In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

Reading the datafile¶

In [2]:
df = pd.read_csv('C:/Anaconda/xdrip3-mod.csv')
df.head()
Out[2]:
DAY TIME UDT_CGMS BG_LEVEL CH_GR BOLUS REMARK
0 02.09.2022 00:01 211.0 NaN NaN NaN NaN
1 02.09.2022 00:06 217.0 NaN NaN NaN NaN
2 02.09.2022 00:11 226.0 NaN NaN NaN NaN
3 02.09.2022 00:16 224.0 NaN NaN NaN NaN
4 02.09.2022 00:21 220.0 NaN NaN NaN NaN
In [3]:
df.describe()
Out[3]:
UDT_CGMS BG_LEVEL CH_GR BOLUS
count 27702.000000 0.0 523.000000 875.000000
mean 147.803877 NaN 29.344168 2.564971
std 49.106456 NaN 15.159690 1.731730
min 39.000000 NaN 5.000000 0.050000
25% 111.000000 NaN 20.000000 1.350000
50% 139.000000 NaN 25.000000 2.000000
75% 178.000000 NaN 40.000000 3.425000
max 401.000000 NaN 80.000000 10.050000
In [4]:
mapping = {
    'DAY': 'Date',
    'TIME': 'Time',
    'UDT_CGMS': 'Glucose value'
}
df = df.rename(columns=mapping)
df
Out[4]:
Date Time Glucose value BG_LEVEL CH_GR BOLUS REMARK
0 02.09.2022 00:01 211.0 NaN NaN NaN NaN
1 02.09.2022 00:06 217.0 NaN NaN NaN NaN
2 02.09.2022 00:11 226.0 NaN NaN NaN NaN
3 02.09.2022 00:16 224.0 NaN NaN NaN NaN
4 02.09.2022 00:21 220.0 NaN NaN NaN NaN
... ... ... ... ... ... ... ...
29227 01.12.2022 20:32 NaN NaN NaN 3.10 NaN
29228 01.12.2022 20:49 NaN NaN 25.0 NaN NaN
29229 01.12.2022 20:49 NaN NaN NaN 2.25 NaN
29230 02.12.2022 18:51 NaN NaN NaN NaN Stopped by transmitter: Stopped
29231 02.12.2022 19:27 NaN NaN NaN NaN Started by xDrip

29232 rows × 7 columns

In [5]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29232 entries, 0 to 29231
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Date           29232 non-null  object 
 1   Time           29232 non-null  object 
 2   Glucose value  27702 non-null  float64
 3   BG_LEVEL       0 non-null      float64
 4   CH_GR          523 non-null    float64
 5   BOLUS          875 non-null    float64
 6   REMARK         140 non-null    object 
dtypes: float64(4), object(3)
memory usage: 1.6+ MB

Data Preprocessing¶

In [6]:
# Remove rows with NaN in 'Glucose value' column
df = df.dropna(subset=['Glucose value'])

Marjorie dataset CGM - Continuous Glucose Monitoring Data plotting using plotly¶

It has built-in interactivity for zooming, panning

In [7]:
# Create a line chart using Plotly
fig = go.Figure(data=go.Scatter(x=df['Date'], y=df['Glucose value'], mode='lines'))

# Add title and axis labels
fig.update_layout(title='Glucose Reading for all dates from 2 Sep 2022 to 8 Dec 2022', xaxis_title='Date', yaxis_title='Glucose Reading')

# Show the plot
fig.show()
In [8]:
import plotly.graph_objects as go

# Create a line chart using Plotly
fig = go.Figure(data=go.Scatter(x=df['Date'], y=df['Glucose value'], mode='lines'))

# Add title and axis labels
fig.update_layout(title='Glucose Reading for all dates from 2 Sep 2022 to 8 Dec 2022', xaxis_title='Date', yaxis_title='Glucose Reading')

# Set the initial x-axis and y-axis range (you can adjust the values accordingly)
fig.update_xaxes(range=[0, 10])  # Set the range of the x-axis to be between 0 and 10
fig.update_yaxes(range=[80, 180])  # Set the range of the y-axis to be between 80 and 180

# Show the plot
fig.show()

Glucose data for a specific day¶

In [9]:
# # Load data from the Excel file
# file_path = 'C:/Anaconda/xdrip2-mod.csv'
# df = pd.read_csv(file_path)

# Convert 'Date' column to datetime type with correct format
df['Date'] = pd.to_datetime(df['Date'], format='%d.%m.%Y')
df['Time'] = pd.to_datetime(df['Time'], format='%H:%M')

# # Preprocess the data by filling missing values in 'UDT_CGMS' column with 0
# df['UDT_CGMS'] = df['UDT_CGMS'].fillna(0)

# Filter data for a single day (e.g., '2022-09-02')
selected_day = '2022-12-07'

df_single_day = df[df['Date'] == selected_day]

# Create the time series plot
fig = go.Figure()

fig.add_trace(go.Scatter(x=df_single_day['Time'], y=df_single_day['Glucose value'],
                         mode='lines',
                         line=dict(color='blue'),
                         name='Glucose value'))

# Add dashed lines for thresholds
thresholds = [180, 100, 35]
for threshold in thresholds:
    fig.add_shape(type='line', x0=min(df_single_day['Time']), x1=max(df_single_day['Time']),
                  y0=threshold, y1=threshold,
                  line=dict(color='red', width=1, dash='dash'),
                  name=f'Threshold {threshold}')

# Update x-axis and y-axis labels
fig.update_xaxes(title_text='Time', tickformat='%H:%M')
fig.update_yaxes(title_text='Glucose value')

# Set plot title
fig.update_layout(title_text=f'Time Series Plot - Glucose value for {selected_day}')

# Show the plot
fig.show()
C:\Users\Dharanija Bantu\AppData\Local\Temp\ipykernel_8412\80342810.py:6: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

C:\Users\Dharanija Bantu\AppData\Local\Temp\ipykernel_8412\80342810.py:7: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

Glucose data of all days superimposed¶

In [10]:
# Create a figure
fig = go.Figure()

# Group data by date and create traces for each day
for day, data in df.groupby('Date'):
    formatted_date = f"{day.day} {day.strftime('%b')} {day.year}" 
    fig.add_trace(go.Scatter(x=data['Time'], y=data['Glucose value'],
                             mode='lines',
                             name=f'Glucose value - {formatted_date}'))

# Count the number of days plotted
num_days = len(df['Date'].unique())

# Add dashed lines for thresholds
thresholds = [180, 100, 35]
for threshold in thresholds:
    fig.add_shape(type='line', x0=min(df['Time']), x1=max(df['Time']),
                  y0=threshold, y1=threshold,
                  line=dict(color='red', width=1, dash='dash'),
                  name=f'Threshold {threshold}')

# Update x-axis and y-axis labels
fig.update_xaxes(title_text='Time', tickformat='%H:%M')
fig.update_yaxes(title_text='Glucose value')

# Set plot title including the number of days
fig.update_layout(title_text=f'Continuous glucose monitoring time Series plot -  for {num_days} days')

# Show the plot
fig.show()

Time series plot for all the data¶

In [11]:
# Combine 'Date' and 'Time' columns into a single 'Datetime' column
df['DATETIME'] = df['Date'] + pd.to_timedelta(df['Time'].dt.hour, unit='h') + pd.to_timedelta(df['Time'].dt.minute, unit='m')

# Create the Plotly figure
fig = px.line(df, x='DATETIME', y='Glucose value', title='Glucose Readings Time Series Plot')

# Add dashed lines for thresholds
thresholds = [180, 100, 35]
for threshold in thresholds:
    fig.add_shape(type='line', x0=min(df['DATETIME']), x1=max(df['DATETIME']),
                  y0=threshold, y1=threshold,
                  line=dict(color='red', width=1, dash='dash'),
                  name=f'Threshold {threshold}')

# Add a range slider for selecting the date range
fig.update_xaxes(rangeslider_visible=True)

# Show the interactive plot
fig.show()
C:\Users\Dharanija Bantu\AppData\Local\Temp\ipykernel_8412\3956109903.py:2: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

Daily view enabling selection of day and hour¶

In [12]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output

# Create a time-only column without date information
df['TimeOnly'] = df['Time'].apply(lambda x: x.time())

# Create a Dash app
app = dash.Dash(__name__)

# Define the app layout
app.layout = html.Div([
    dcc.Graph(id='glucose-plot'),
    dcc.Slider(
        id='date-slider',
        min=df['Date'].min().date().toordinal(),
        max=df['Date'].max().date().toordinal(),
        value=df['Date'].min().date().toordinal(),
        marks={d.toordinal(): d.strftime('%Y-%m') for i, d in enumerate(df['Date']) if d.day == 1},
        step=1
    ),
    dcc.RangeSlider(
        id='time-range-slider',
        min=0,
        max=24,
        step=0.5,
        value=[0, 24],
        marks={i: f"{i}:00" for i in range(0, 25, 2)}
    )
])

# Create a callback to update the plot based on the slider values
@app.callback(
    Output('glucose-plot', 'figure'),
    [Input('date-slider', 'value'),
     Input('time-range-slider', 'value')]
)
def update_graph(selected_date_ordinal, time_range):
    selected_date = pd.Timestamp.fromordinal(selected_date_ordinal)
    filtered_df = df[df['Date'] == selected_date]
    start_hour, end_hour = time_range
    
    filtered_df = filtered_df[
        (filtered_df['TimeOnly'].apply(lambda x: x.hour) >= start_hour) & 
        (filtered_df['TimeOnly'].apply(lambda x: x.hour) <= end_hour)
    ]
    
    fig = go.Figure()
    for day, data in filtered_df.groupby('Date'):
        trace = go.Scatter(x=data['TimeOnly'], y=data['Glucose value'], mode='lines', name=str(day.date()))
        fig.add_trace(trace)
    
    # Add dashed lines for thresholds
    thresholds = [180, 100, 35]
    for threshold in thresholds:
        fig.add_shape(type='line', x0=min(filtered_df['TimeOnly']), x1=max(filtered_df['TimeOnly']),
                      y0=threshold, y1=threshold,
                      line=dict(color='red', width=1, dash='dash'),
                      name=f'Threshold {threshold}')
    
    fig.update_layout(
        xaxis_title='Time',
        yaxis_title='Glucose value',
        title='Glucose Readings Time Series',
        showlegend=True
    )
    
    return fig

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True, port=8051)
C:\Users\Dharanija Bantu\AppData\Local\Temp\ipykernel_8412\3131285681.py:6: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

Stack of all days¶

In [13]:
# Create a separate graph for each day's data
for day, data in df.groupby('Date'):
    fig = go.Figure()

    fig.add_trace(go.Scatter(x=data['Time'], y=data['Glucose value'],
                             mode='lines',
                             line=dict(color='blue'),
                             name='Glucose value'))
    # Add dashed lines for thresholds
    thresholds = [180, 100, 35]
    for threshold in thresholds:
        fig.add_shape(type='line', x0=min(data['Time']), x1=max(data['Time']),
                      y0=threshold, y1=threshold,
                      line=dict(color='red', width=1, dash='dash'),
                      name=f'Threshold {threshold}')

    # Update x-axis and y-axis labels
    fig.update_xaxes(title_text='Time', tickformat='%H:%M')
    fig.update_yaxes(title_text='Glucose value')

    # Set plot title
    fig.update_layout(title_text=f'Time Series Plot - Glucose value for {day.strftime("%Y-%m-%d")}',
                      showlegend=True)

    # Show the plot for each day
    fig.show()

Removing connecting lines and plotting all data¶

In [14]:
# Create a line chart using Plotly
fig = go.Figure(data=go.Scatter(x=df['Date'], y=df['Glucose value'], mode='markers'))

# Dashed lines for thresholds
thresholds = [180, 100, 35]
for threshold in thresholds:
    fig.add_shape(type='line', x0=min(df['Date']), x1=max(df['Date']),
                  y0=threshold, y1=threshold,
                  line=dict(color='red', width=1, dash='dash'),
                  name=f'Threshold {threshold}')

# Add title and axis labels
fig.update_layout(title='Glucose Reading for all Dates', xaxis_title='Date', yaxis_title='Glucose Reading')

# Show the plot
fig.show()

Summary plot of median and quartiles for each 5-minute interval¶

In [15]:
# Calculate median and quartiles for each 5-minute interval
interval_length = pd.Timedelta(minutes=5)
df['Interval'] = (df['Time'].dt.hour * 60 + df['Time'].dt.minute) // 5
interval_stats = df.groupby(['Interval'])['Glucose value'].agg([np.median, lambda x: np.percentile(x, 25), lambda x: np.percentile(x, 75)]).reset_index()
interval_stats['Time'] = pd.to_timedelta(interval_stats['Interval'] * 5, unit='m')

# Define a function to convert time intervals back to meaningful time values
def convert_interval_to_time(interval):
    total_minutes = interval.total_seconds() / 60
    hours = int(total_minutes // 60)
    minutes = int(total_minutes % 60)
    return f"{hours:02d}:{minutes:02d}"

# Apply the conversion function to create x-axis labels
interval_stats['Time_Labels'] = interval_stats['Time'].apply(convert_interval_to_time)

# Create a single trace for median, Q1, and Q3
median_trace = go.Scatter(
    x=interval_stats['Time'],
    y=interval_stats['median'],
    mode='lines+markers',
    name='Median',
    line=dict(color='red', dash='dash')
)

q1_trace = go.Scatter(
    x=interval_stats['Time'],
    y=interval_stats['<lambda_0>'],
    mode='lines+markers',
    name='Q1',
    line=dict(color='blue')
)

q3_trace = go.Scatter(
    x=interval_stats['Time'],
    y=interval_stats['<lambda_1>'],
    mode='lines+markers',
    name='Q3',
    line=dict(color='green')
)

# Create the layout
layout = go.Layout(
    title='Time Series Plot with Median and Quartiles - Glucose value for All Days',
    xaxis=dict(title='Time', tickvals=interval_stats['Time'], ticktext=interval_stats['Time_Labels']),
    yaxis=dict(title='Glucose value')
)

# Create the figure
fig = go.Figure(data=[median_trace, q1_trace, q3_trace], layout=layout)

# Show the plot
fig.show()
C:\Users\Dharanija Bantu\AppData\Local\Temp\ipykernel_8412\3061573823.py:3: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

Threshold based events¶

Time series plot for glucose value above 180mg/dl for continuous time of 15min or more¶

In [16]:
# Combine 'DAY' and 'TIME' columns to create a datetime column
df['DATETIME'] = df['Date'] + pd.to_timedelta(df['Time'].dt.hour, unit='h') + pd.to_timedelta(df['Time'].dt.minute, unit='m')

# Sort the dataframe by 'DATETIME'
df = df.sort_values('DATETIME')

# Reset index
df.reset_index(drop=True, inplace=True)
C:\Users\Dharanija Bantu\AppData\Local\Temp\ipykernel_8412\1445165374.py:2: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

In [17]:
above_threshold_events = []
threshold = 180
continuous_minutes = 15

current_event_start = None
for index, row in df.iterrows():
    if row['Glucose value'] > threshold:
        if current_event_start is None:
            current_event_start = row['DATETIME']
    else:
        if current_event_start is not None:
            event_duration = (row['DATETIME'] - current_event_start).total_seconds() / 60
            if event_duration >= continuous_minutes:
                above_threshold_events.append((current_event_start, row['DATETIME']))
            current_event_start = None

# Convert the events to a dataframe
events_df = pd.DataFrame(above_threshold_events, columns=['Start', 'End'])
In [18]:
import plotly.express as px

# Create the time series plot
fig = px.line(df, x='DATETIME', y='Glucose value', title='Glucose value time Series Plot for Above 180mg/dl')

# Add dashed line at the glucose threshold
fig.add_shape(
    type="line",
    x0=df['DATETIME'].iloc[0],
    y0=threshold,
    x1=df['DATETIME'].iloc[-1],
    y1=threshold,
    line=dict(color="red", dash="dash"),
)

# Add rectangles for threshold-based events
for _, event in events_df.iterrows():
    fig.add_shape(
        type="rect",
        x0=event['Start'],
        y0=0,
        x1=event['End'],
        y1=300,  # Adjust the y1 value to fit your data range
        line=dict(color="red"),
        fillcolor="rgba(255, 0, 0, 0.3)",
    )

# Show the plot
fig.show()

Time series plot for glucose value below 60mg/dl for continuous time of 15min or more¶

In [19]:
below_threshold_events_60 = []
threshold_60 = 60
continuous_minutes = 15

current_event_start = None
for index, row in df.iterrows():
    if row['Glucose value'] < threshold_60:
        if current_event_start is None:
            current_event_start = row['DATETIME']
    else:
        if current_event_start is not None:
            event_duration = (row['DATETIME'] - current_event_start).total_seconds() / 60
            if event_duration >= continuous_minutes:
                below_threshold_events_60.append((current_event_start, row['DATETIME']))
            current_event_start = None

# Convert the events to dataframes
events_df_60 = pd.DataFrame(below_threshold_events_60, columns=['Start', 'End'])

# Plot for below 60
fig_below_60 = px.line(df, x='DATETIME', y='Glucose value', title='Glucose Value - Time Series Plot below 60mg/dl')
fig_below_60.add_shape(
    type="line",
    x0=df['DATETIME'].iloc[0],
    y0=threshold_60,
    x1=df['DATETIME'].iloc[-1],
    y1=threshold_60,
    line=dict(color="red", dash="dash"),
)
for _, event in events_df_60.iterrows():
    fig_below_60.add_shape(
        type="rect",
        x0=event['Start'],
        y0=0,
        x1=event['End'],
        y1=300,
        line=dict(color="red"),
        fillcolor="rgba(255, 0, 0, 0.3)",
    )
fig_below_60.show()

Time series plot for glucose value below 35 mg/dl for continuous time of 15min or more¶

In [20]:
below_threshold_events_35 = []
threshold_35 = 35
continuous_minutes = 15

current_event_start = None
for index, row in df.iterrows():
    if row['Glucose value'] < threshold_35:
        if current_event_start is None:
            current_event_start = row['DATETIME']
    else:
        if current_event_start is not None:
            event_duration = (row['DATETIME'] - current_event_start).total_seconds() / 60
            if event_duration >= continuous_minutes:
                below_threshold_events_35.append((current_event_start, row['DATETIME']))
            current_event_start = None

# Convert the events to dataframes
events_df_35 = pd.DataFrame(below_threshold_events_35, columns=['Start', 'End'])

# Plot for below 35
fig_below_35 = px.line(df, x='DATETIME', y='Glucose value', title='Glucose Value - Time Series Plot below 35mg/dl')
fig_below_35.add_shape(
    type="line",
    x0=df['DATETIME'].iloc[0],
    y0=threshold_35,
    x1=df['DATETIME'].iloc[-1],
    y1=threshold_35,
    line=dict(color="red", dash="dash"),
)
for _, event in events_df_35.iterrows():
    fig_below_35.add_shape(
        type="rect",
        x0=event['Start'],
        y0=0,
        x1=event['End'],
        y1=300,
        line=dict(color="red"),
        fillcolor="rgba(255, 0, 0, 0.3)",
    )
fig_below_35.show()

Histogram of glucose values¶

In [21]:
# Create a histogram of glucose values
fig = px.histogram(
    df,
    x='Glucose value',
    nbins=20,  
    labels={'x': 'Glucose value', 'y': 'Count'},
)
fig.update_layout(title_text='Histogram of Glucose Values')

fig.show()

Summary plot of median and quartiles for each 60-minute interval¶

In [22]:
import pandas as pd
import numpy as np
import plotly.graph_objs as go

# Calculate median and quartiles for each 60-minute interval
interval_length = pd.Timedelta(minutes=60)
df['Interval'] = (df['Time'].dt.hour * 60 + df['Time'].dt.minute) // 60
interval_stats = df.groupby(['Interval'])['Glucose value'].agg([
    np.median,
    lambda x: np.percentile(x, 10),
    lambda x: np.percentile(x, 25),
    lambda x: np.percentile(x, 75),
    lambda x: np.percentile(x, 90)
]).reset_index()
interval_stats['Time'] = pd.to_timedelta(interval_stats['Interval'] * 60, unit='m')

# Define a function to convert time intervals back to meaningful time values
def convert_interval_to_time(interval):
    total_minutes = interval.total_seconds() / 60
    hours = int(total_minutes // 60)
    minutes = int(total_minutes % 60)
    return f"{hours:02d}:{minutes:02d}"

# Apply the conversion function to create x-axis labels
interval_stats['Time_Labels'] = interval_stats['Time'].apply(convert_interval_to_time)

# Create traces for median, quartiles, and percentiles
median_trace = go.Scatter(
    x=interval_stats['Time'],
    y=interval_stats['median'],
    mode='lines',
    name='Median',
    line=dict(color='red', dash='dash')
)

p10_trace = go.Scatter(
    x=interval_stats['Time'],
    y=interval_stats['<lambda_0>'],
    mode='lines',
    name='10th Percentile',
    line=dict(color='purple')
)

q1_trace = go.Scatter(
    x=interval_stats['Time'],
    y=interval_stats['<lambda_1>'],
    mode='lines',
    name='25th Percentile',
    line=dict(color='blue')
)

q3_trace = go.Scatter(
    x=interval_stats['Time'],
    y=interval_stats['<lambda_2>'],
    mode='lines',
    name='75th Percentile',
    line=dict(color='green')
)

p90_trace = go.Scatter(
    x=interval_stats['Time'],
    y=interval_stats['<lambda_3>'],
    mode='lines',
    name='90th Percentile',
    line=dict(color='orange')
)

# Create traces for shaded regions
shade_25_75 = go.Scatter(
    x=interval_stats['Time'].append(interval_stats['Time'][::-1]),
    y=interval_stats['<lambda_2>'].append(interval_stats['<lambda_1>'][::-1]),
    fill='toself',
    fillcolor='rgba(0,255,0,0.2)',  # Light green shading
    line=dict(color='rgba(255,255,255,0)'),
    name='25th to 75th Percentile'
)

shade_10_90 = go.Scatter(
    x=interval_stats['Time'].append(interval_stats['Time'][::-1]),
    y=interval_stats['<lambda_0>'].append(interval_stats['<lambda_3>'][::-1]),
    fill='toself',
    fillcolor='rgba(255,182,193,0.2)',  # Light pink shading
    line=dict(color='rgba(255,255,255,0)'),
    name='10th to 90th Percentile'
)

# Create the layout
layout = go.Layout(
    title='Time Series Plot with Median and Percentiles - Glucose value for Every Hour',
    xaxis=dict(title='Time', tickvals=interval_stats['Time'], ticktext=interval_stats['Time_Labels']),
    yaxis=dict(title='Glucose value', dtick=60),  # Set dtick to 60 for y-axis
    showlegend=True
)

# Create the figure
fig = go.Figure(data=[median_trace, p10_trace, q1_trace, q3_trace, p90_trace, shade_25_75, shade_10_90], layout=layout)

# Show the plot
fig.show()

Summary plot of median and quartiles for each 60-minute interval with calendar¶

In [23]:
import pandas as pd
import numpy as np
import plotly.graph_objs as go
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output


# Calculate median and quartiles for each 60-minute interval
interval_length = pd.Timedelta(minutes=60)
df['Interval'] = (df['DATETIME'].dt.hour * 60 + df['DATETIME'].dt.minute) // 60
interval_stats = df.groupby(['Interval'])['Glucose value'].agg([
    np.median,
    lambda x: np.percentile(x, 10),
    lambda x: np.percentile(x, 25),
    lambda x: np.percentile(x, 75),
    lambda x: np.percentile(x, 90)
]).reset_index()
interval_stats['DATETIME'] = pd.to_timedelta(interval_stats['Interval'] * 60, unit='m')

# Define a function to convert time intervals back to meaningful time values
def convert_interval_to_time(interval):
    total_minutes = interval.total_seconds() / 60
    hours = int(total_minutes // 60)
    minutes = int(total_minutes % 60)
    return f"{hours:02d}:{minutes:02d}"

# Apply the conversion function to create x-axis labels
interval_stats['Time_Labels'] = interval_stats['DATETIME'].apply(convert_interval_to_time)

# Create the Dash app
app = dash.Dash(__name__)

# Define the app layout
app.layout = html.Div([
    dcc.DatePickerRange(
        id='date-picker',
        start_date=df['DATETIME'].min().date(),
        end_date=df['DATETIME'].max().date(),
        display_format='YYYY-MM-DD'
    ),
    dcc.Graph(id='glucose-plot')
])

# Define callback to update the graph based on selected dates
@app.callback(
    Output('glucose-plot', 'figure'),
    [Input('date-picker', 'start_date'),
     Input('date-picker', 'end_date')]
)
def update_graph(start_date, end_date):
    filtered_df = df[(df['DATETIME'].dt.date >= pd.to_datetime(start_date)) & (df['DATETIME'].dt.date <= pd.to_datetime(end_date))]
    
    # Calculate interval_stats for the filtered data
    filtered_interval_stats = filtered_df.groupby(['Interval'])['Glucose value'].agg([
        np.median,
        lambda x: np.percentile(x, 10),
        lambda x: np.percentile(x, 25),
        lambda x: np.percentile(x, 75),
        lambda x: np.percentile(x, 90)
    ]).reset_index()
    filtered_interval_stats['DATETIME'] = pd.to_timedelta(filtered_interval_stats['Interval'] * 60, unit='m')

    # Apply the conversion function to create x-axis labels
    filtered_interval_stats['Time_Labels'] = filtered_interval_stats['DATETIME'].apply(convert_interval_to_time)

    # Create traces for median, quartiles, and percentiles
    median_trace = go.Scatter(
        x=filtered_interval_stats['DATETIME'],
        y=filtered_interval_stats['median'],
        mode='lines',
        name='Median',
        line=dict(color='red', dash='dash')
    )

    p10_trace = go.Scatter(
        x=filtered_interval_stats['DATETIME'],
        y=filtered_interval_stats['<lambda_0>'],
        mode='lines',
        name='10th Percentile',
        line=dict(color='purple')
    )

    q1_trace = go.Scatter(
        x=filtered_interval_stats['DATETIME'],
        y=filtered_interval_stats['<lambda_1>'],
        mode='lines',
        name='25th Percentile',
        line=dict(color='blue')
    )

    q3_trace = go.Scatter(
        x=filtered_interval_stats['DATETIME'],
        y=filtered_interval_stats['<lambda_2>'],
        mode='lines',
        name='75th Percentile',
        line=dict(color='green')
    )

    p90_trace = go.Scatter(
        x=filtered_interval_stats['DATETIME'],
        y=filtered_interval_stats['<lambda_3>'],
        mode='lines',
        name='90th Percentile',
        line=dict(color='orange')
    )

    # Create traces for shaded regions
    shade_25_75 = go.Scatter(
        x=filtered_interval_stats['DATETIME'].append(filtered_interval_stats['DATETIME'][::-1]),
        y=filtered_interval_stats['<lambda_2>'].append(filtered_interval_stats['<lambda_1>'][::-1]),
        fill='toself',
        fillcolor='rgba(0,255,0,0.2)',  # Light green shading
        line=dict(color='rgba(255,255,255,0)'),
        name='25th to 75th Percentile'
    )

    shade_10_90 = go.Scatter(
        x=filtered_interval_stats['DATETIME'].append(filtered_interval_stats['DATETIME'][::-1]),
        y=filtered_interval_stats['<lambda_0>'].append(filtered_interval_stats['<lambda_3>'][::-1]),
        fill='toself',
        fillcolor='rgba(255,182,193,0.2)',  # Light pink shading
        line=dict(color='rgba(255,255,255,0)'),
        name='10th to 90th Percentile'
    )
    
    # Add dashed lines for thresholds
    thresholds = [250, 180, 70, 60, 50]
    threshold_shapes = []
    for threshold in thresholds:
        threshold_shapes.append(
            {
                'type': 'line',
                'x0': min(filtered_interval_stats['DATETIME']),
                'x1': max(filtered_interval_stats['DATETIME']),
                'y0': threshold,
                'y1': threshold,
                'line': {'color': 'gray', 'width': 1, 'dash': 'dash'},
                'name': f'Threshold {threshold}'
            }
        )
    
    # Create the layout
    layout = go.Layout(
        title='Time Series Plot with Median and Percentiles - Glucose value for Every Hour',
        xaxis=dict(title='Time', tickvals=interval_stats['DATETIME'], ticktext=interval_stats['Time_Labels']),
        yaxis=dict(title='Glucose value', dtick=60),  # Set dtick to 60 for y-axis
        showlegend=True 
    )
    
        
    fig = go.Figure(data=[median_trace, p10_trace, q1_trace, q3_trace, p90_trace, shade_25_75, shade_10_90], layout=layout)
    return fig

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True)
C:\Users\Dharanija Bantu\AppData\Local\Temp\ipykernel_8412\1164876772.py:5: UserWarning:


The dash_core_components package is deprecated. Please replace
`import dash_core_components as dcc` with `from dash import dcc`

C:\Users\Dharanija Bantu\AppData\Local\Temp\ipykernel_8412\1164876772.py:6: UserWarning:


The dash_html_components package is deprecated. Please replace
`import dash_html_components as html` with `from dash import html`

C:\Anaconda\lib\site-packages\pandas\core\ops\array_ops.py:73: FutureWarning:

Comparison of Timestamp with datetime.date is deprecated in order to match the standard library behavior.  In a future version these will be considered non-comparable.Use 'ts == pd.Timestamp(date)' or 'ts.date() == date' instead.

Pattern Based Events¶

Any increase of +20 mg/dl or more in continuous 10 minutes or less¶

Any decrease of -20 mg/dl or more in continuous 10 minutes or less¶

In [24]:
# Calculate time differences and glucose changes
df['TimeDiff'] = df['DATETIME'].diff().dt.total_seconds() / 60.0
df['GlucoseChange'] = df['Glucose value'].diff()

# Identify increases and decreases exceeding thresholds
increase_mask = (df['GlucoseChange'] >= 20) & (df['TimeDiff'] <= 10)
decrease_mask = (df['GlucoseChange'] <= -20) & (df['TimeDiff'] <= 10)

df['IncreaseEvent'] = increase_mask
df['DecreaseEvent'] = decrease_mask
In [25]:
fig_glucose = px.line(df, x='DATETIME', y='Glucose value', title='Glucose Time Series')
fig_glucose.update_xaxes(title_text='Datetime')
fig_glucose.update_yaxes(title_text='Glucose Value (mg/dl)')
fig_glucose.show()
In [26]:
import plotly.subplots as sp

# Create subplots with two rows (one for increase events, one for decrease events)
fig = sp.make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.15, subplot_titles=("Any increase of +20 mg/dl or more in continuous 10 minutes or less", "Any decrease of -20 mg/dl or more in continuous 10 minutes or less"))

# Add increase events as curves with markers
for idx, row in df[increase_mask].iterrows():
    end_time = row['DATETIME']
    start_time = end_time - pd.Timedelta(minutes=10)  # Calculate the start time as 10 minutes before the event
    
    # Filter the data to include points within the 10-minute interval
    interval_data = df[(df['DATETIME'] >= start_time) & (df['DATETIME'] <= end_time)]
    
    fig.add_trace(
        go.Scatter(
            x=interval_data['DATETIME'],
            y=interval_data['Glucose value'],
            mode='lines+markers',  # Include both lines and markers
            line=dict(color='red'),
            marker=dict(color='red'),  # Marker color
            name='Increase Event'
        ),
        row=1, col=1  # Add to the first subplot (increase events)
    )

# Add decrease events as curves with markers
for idx, row in df[decrease_mask].iterrows():
    end_time = row['DATETIME']
    start_time = end_time - pd.Timedelta(minutes=10)  # Calculate the start time as 10 minutes before the event
    
    # Filter the data to include points within the 10-minute interval
    interval_data = df[(df['DATETIME'] >= start_time) & (df['DATETIME'] <= end_time)]
    
    fig.add_trace(
        go.Scatter(
            x=interval_data['DATETIME'],
            y=interval_data['Glucose value'],
            mode='lines+markers',  # Include both lines and markers
            line=dict(color='blue'),
            marker=dict(color='blue'),  # Marker color
            name='Decrease Event'
        ),
        row=2, col=1  # Add to the second subplot (decrease events)
    )

fig.update_layout(
    title='Glucose Events Visualization - Any increase of +20 mg/dl or more and Any decrease of -20 mg/dl or more',
    xaxis_title='Datetime',
    yaxis_title='Glucose Value (mg/dl)',
    showlegend=False
)

# Adjust subplot titles' positions
fig.update_annotations(
    selector=dict(text='Any increase of +20 mg/dl or more in continuous 10 minutes or less'),
    yshift=5 # Increase the vertical position for the first subplot title
)
fig.update_annotations(
    selector=dict(text='Any decrease of -20 mg/dl or more in continuous 10 minutes or less'),
    yshift=5  # Increase the vertical position for the second subplot title
)

fig.update_xaxes(title_text="Datetime", row=2, col=1)  # Add x-axis title for the second subplot
fig.show()

A rapid increase of +20 mg/dl or greater followed by a rapid decrease of -20mg/dl or more in a timespan of 1 hour or less, a rapid decrease of -20 mg/dl or greater followed by a rapid increase of +20mg/dl or more in a timespan of 1 hour or less¶

In [28]:
import plotly.subplots as sp

# Create subplots with two rows (one for rapid increase-decrease, one for rapid decrease-increase)
fig = sp.make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.25, subplot_titles=("Rapid Increase followed by Rapid Decrease in 1 hour or less", "Rapid Decrease followed by Rapid Increase in 1 hour or less"))

# Calculate the rolling 1-hour difference in glucose values
rolling_diff = df['Glucose value'].diff().rolling(window=12).sum()  # 12 data points in 1 hour

# Create masks for rapid increase-decrease and rapid decrease-increase events
rapid_increase_decrease_mask = (
    (rolling_diff >= 20) &  # Rapid increase of +20 mg/dL or greater
    (df['GlucoseChange'] <= -20) &  # Rapid decrease of -20 mg/dL or more
    (df['TimeDiff'] <= 60)  # Within a timespan of 1 hour or less
)

rapid_decrease_increase_mask = (
    (rolling_diff <= -20) &  # Rapid decrease of -20 mg/dL or more
    (df['GlucoseChange'] >= 20) &  # Rapid increase of +20 mg/dL or greater
    (df['TimeDiff'] <= 60)  # Within a timespan of 1 hour or less
)

# Add rapid increase-decrease events as curves with markers
for idx, row in df[rapid_increase_decrease_mask].iterrows():
    end_time = row['DATETIME']
    start_time = end_time - pd.Timedelta(minutes=60)  # Calculate the start time as 1 hour before the event
    
    # Filter the data to include points within the 1-hour interval
    interval_data = df[(df['DATETIME'] >= start_time) & (df['DATETIME'] <= end_time)]
    
    fig.add_trace(
        go.Scatter(
            x=interval_data['DATETIME'],
            y=interval_data['Glucose value'],
            mode='lines+markers',
            line=dict(color='green'),
            marker=dict(color='green'),
            name='Rapid Inc. followed by Rapid Dec.'
        ),
        row=1, col=1
    )

# Add rapid decrease-increase events as curves with markers
for idx, row in df[rapid_decrease_increase_mask].iterrows():
    end_time = row['DATETIME']
    start_time = end_time - pd.Timedelta(minutes=60)  # Calculate the start time as 1 hour before the event
    
    # Filter the data to include points within the 1-hour interval
    interval_data = df[(df['DATETIME'] >= start_time) & (df['DATETIME'] <= end_time)]
    
    fig.add_trace(
        go.Scatter(
            x=interval_data['DATETIME'],
            y=interval_data['Glucose value'],
            mode='lines+markers',
            line=dict(color='purple'),
            marker=dict(color='purple'),
            name='Rapid Dec. followed by Rapid Inc.'
        ),
        row=2, col=1
    )

fig.update_layout(
    title='Rapid Glucose Changes Visualization',
    xaxis_title='Datetime',
    yaxis_title='Glucose Value (mg/dl)',
    showlegend=False
)

# Adjust subplot titles' positions
fig.update_annotations(
    selector=dict(text='Rapid Increase followed by Rapid Decrease in 1 hour or less'),
    yshift=5
)
fig.update_annotations(
    selector=dict(text='Rapid Decrease followed by Rapid Increase in 1 hour or less'),
    yshift=5
)

fig.update_xaxes(title_text="Datetime", row=2, col=1)
fig.show()

Over correction plot of +/- 30 mg/dl in continuous 10 min or less¶

In [29]:
# Identify overcorrection events
overcorrection_mask = ((df['GlucoseChange'] >= 30) | (df['GlucoseChange'] <= -30)) & (df['TimeDiff'] <= 10)

# Create an empty figure
fig = go.Figure()

# Plot each overcorrection event with exact data points
for idx, row in df[overcorrection_mask].iterrows():
    before_start_time = row['DATETIME'] - pd.Timedelta(minutes=10)
    after_end_time = row['DATETIME']
    subset = df[(df['DATETIME'] >= before_start_time) & (df['DATETIME'] <= after_end_time)]
    
    fig.add_trace(
        go.Scatter(
            x=subset['DATETIME'],
            y=subset['Glucose value'],
            mode='lines+markers',
            line=dict(color='purple'),
            marker=dict(color='purple'),
            name='Overcorrection Event'
        )
    )

fig.update_layout(
    title='Over correction plot of +/- 30 mg/dl in continuous 10 min or less',
    xaxis_title='Datetime',
    yaxis_title='Glucose Value (mg/dl)',
    showlegend=False
)
fig.show()

Hybrid plot of pattern-based events and threshold-based events¶

Increase of 20mg/dl, decrease of 20mg/dl events in continuous 10 min or less and threshold is above 180¶

In [30]:
# Create subplots with two rows (one for increase events, one for decrease events)
fig = sp.make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.15, subplot_titles=("Any increase of +20 mg/dl or more in continuous 10 minutes or less", "Any decrease of -20 mg/dl or more in continuous 10 minutes or less"))

# Add increase events as curves with markers
for idx, row in df[increase_mask].iterrows():
    end_time = row['DATETIME']
    start_time = end_time - pd.Timedelta(minutes=10)  # Calculate the start time as 10 minutes before the event
    
    # Filter the data to include points within the 10-minute interval
    interval_data = df[(df['DATETIME'] >= start_time) & (df['DATETIME'] <= end_time)]
    
    fig.add_trace(
        go.Scatter(
            x=interval_data['DATETIME'],
            y=interval_data['Glucose value'],
            mode='lines+markers',  # Include both lines and markers
            line=dict(color='red'),
            marker=dict(color='red'),  # Marker color
            name='Increase Event'
        ),
        row=1, col=1  # Add to the first subplot (increase events)
    )

# Add decrease events as curves with markers
for idx, row in df[decrease_mask].iterrows():
    end_time = row['DATETIME']
    start_time = end_time - pd.Timedelta(minutes=10)  # Calculate the start time as 10 minutes before the event
    
    # Filter the data to include points within the 10-minute interval
    interval_data = df[(df['DATETIME'] >= start_time) & (df['DATETIME'] <= end_time)]
    
    fig.add_trace(
        go.Scatter(
            x=interval_data['DATETIME'],
            y=interval_data['Glucose value'],
            mode='lines+markers',  # Include both lines and markers
            line=dict(color='blue'),
            marker=dict(color='blue'),  # Marker color
            name='Decrease Event'
        ),
        row=2, col=1  # Add to the second subplot (decrease events)
    )

# Add threshold line to both subplots
fig.add_shape(
    type="line",
    x0=df['DATETIME'].iloc[0],
    y0=threshold,
    x1=df['DATETIME'].iloc[-1],
    y1=threshold,
    line=dict(color="red", dash="dash"),
    row=1, col=1  # Add to the first subplot (increase events)
)
fig.add_shape(
    type="line",
    x0=df['DATETIME'].iloc[0],
    y0=threshold,
    x1=df['DATETIME'].iloc[-1],
    y1=threshold,
    line=dict(color="red", dash="dash"),
    row=2, col=1  # Add to the second subplot (decrease events)
)

# Add rectangles for threshold-based events to both subplots
for _, event in events_df.iterrows():
    fig.add_shape(
        type="rect",
        x0=event['Start'],
        y0=0,
        x1=event['End'],
        y1=300,  # Adjust the y1 value to fit your data range
        line=dict(color="red"),
        fillcolor="rgba(255, 0, 0, 0.3)",
        row=1, col=1  # Add to the first subplot (increase events)
    )
    fig.add_shape(
        type="rect",
        x0=event['Start'],
        y0=0,
        x1=event['End'],
        y1=300,  # Adjust the y1 value to fit your data range
        line=dict(color="red"),
        fillcolor="rgba(255, 0, 0, 0.3)",
        row=2, col=1  # Add to the second subplot (decrease events)
    )

fig.update_layout(
    title='Glucose Hybrid Events Visualization with Threshold above 180',
    xaxis_title='Datetime',
    yaxis_title='Glucose Value (mg/dl)',
    showlegend=False 
)

# Adjust subplot titles' positions
fig.update_annotations(
    selector=dict(text='Any increase of +20 mg/dl or more in continuous 10 minutes or less'),
    yshift=5  # Increase the vertical position for the first subplot title
)
fig.update_annotations(
    selector=dict(text='Any decrease of -20 mg/dl or more in continuous 10 minutes or less'),
    yshift=5  # Increase the vertical position for the second subplot title
)

fig.update_xaxes(title_text="Datetime", row=2, col=1)  # Add x-axis title for the second subplot

# Show the plot
fig.show()

Increase of 20mg/dl, decrease of 20mg/dl events in continuous 10 min or less and threshold is below 60¶

In [31]:
# Create subplots with two rows (one for increase events, one for decrease events)
fig = sp.make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.15, subplot_titles=("Any increase of +20 mg/dl or more in continuous 10 minutes or less", "Any decrease of -20 mg/dl or more in continuous 10 minutes or less"))

# Add increase events as curves with markers
for idx, row in df[increase_mask].iterrows():
    end_time = row['DATETIME']
    start_time = end_time - pd.Timedelta(minutes=10)  # Calculate the start time as 10 minutes before the event
    
    # Filter the data to include points within the 10-minute interval
    interval_data = df[(df['DATETIME'] >= start_time) & (df['DATETIME'] <= end_time)]
    
    fig.add_trace(
        go.Scatter(
            x=interval_data['DATETIME'],
            y=interval_data['Glucose value'],
            mode='lines+markers',  # Include both lines and markers
            line=dict(color='red'),
            marker=dict(color='red'),  # Marker color
            name='Increase Event'
        ),
        row=1, col=1  # Add to the first subplot (increase events)
    )

# Add decrease events as curves with markers
for idx, row in df[decrease_mask].iterrows():
    end_time = row['DATETIME']
    start_time = end_time - pd.Timedelta(minutes=10)  # Calculate the start time as 10 minutes before the event
    
    # Filter the data to include points within the 10-minute interval
    interval_data = df[(df['DATETIME'] >= start_time) & (df['DATETIME'] <= end_time)]
    
    fig.add_trace(
        go.Scatter(
            x=interval_data['DATETIME'],
            y=interval_data['Glucose value'],
            mode='lines+markers',  # Include both lines and markers
            line=dict(color='blue'),
            marker=dict(color='blue'),  # Marker color
            name='Decrease Event'
        ),
        row=2, col=1  # Add to the second subplot (decrease events)
    )

# Add threshold line to both subplots for below 60
fig.add_shape(
    type="line",
    x0=df['DATETIME'].iloc[0],
    y0=threshold_60,
    x1=df['DATETIME'].iloc[-1],
    y1=threshold_60,
    line=dict(color="red", dash="dash"),
    row=1, col=1  # Add to the first subplot (increase events)
)
fig.add_shape(
    type="line",
    x0=df['DATETIME'].iloc[0],
    y0=threshold_60,
    x1=df['DATETIME'].iloc[-1],
    y1=threshold_60,
    line=dict(color="red", dash="dash"),
    row=2, col=1  # Add to the second subplot (decrease events)
)

# Add rectangles for threshold-based events to both subplots for below 60
for _, event in events_df_60.iterrows():
    fig.add_shape(
        type="rect",
        x0=event['Start'],
        y0=0,
        x1=event['End'],
        y1=300,
        line=dict(color="red"),
        fillcolor="rgba(255, 0, 0, 0.3)",
        row=1, col=1  # Add to the first subplot (increase events)
    )
    fig.add_shape(
        type="rect",
        x0=event['Start'],
        y0=0,
        x1=event['End'],
        y1=300,
        line=dict(color="red"),
        fillcolor="rgba(255, 0, 0, 0.3)",
        row=2, col=1  # Add to the second subplot (decrease events)
    )

fig.update_layout(
    title='Glucose Hybrid Events Visualization with Threshold Below 60',
    xaxis_title='Datetime',
    yaxis_title='Glucose Value (mg/dl)',
    showlegend=False 
)

# Adjust subplot titles' positions
fig.update_annotations(
    selector=dict(text='Any increase of +20 mg/dl or more in continuous 10 minutes or less'),
    yshift=5  # Increase the vertical position for the first subplot title
)
fig.update_annotations(
    selector=dict(text='Any decrease of -20 mg/dl or more in continuous 10 minutes or less'),
    yshift=5  # Increase the vertical position for the second subplot title
)

fig.update_xaxes(title_text="Datetime", row=2, col=1)  # Add x-axis title for the second subplot

# Show the plot
fig.show()

Increase of 20mg/dl, decrease of 20mg/dl events in continuous 10 min or less and threshold is below 35¶

In [32]:
# Create subplots with two rows (one for increase events, one for decrease events)
fig = sp.make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.15, subplot_titles=("Any increase of +20 mg/dl or more in continuous 10 minutes or less", "Any decrease of -20 mg/dl or more in continuous 10 minutes or less"))

# Add increase events as curves with markers
for idx, row in df[increase_mask].iterrows():
    end_time = row['DATETIME']
    start_time = end_time - pd.Timedelta(minutes=10)  # Calculate the start time as 10 minutes before the event
    
    # Filter the data to include points within the 10-minute interval
    interval_data = df[(df['DATETIME'] >= start_time) & (df['DATETIME'] <= end_time)]
    
    fig.add_trace(
        go.Scatter(
            x=interval_data['DATETIME'],
            y=interval_data['Glucose value'],
            mode='lines+markers',  # Include both lines and markers
            line=dict(color='red'),
            marker=dict(color='red'),  # Marker color
            name='Increase Event'
        ),
        row=1, col=1  # Add to the first subplot (increase events)
    )

# Add decrease events as curves with markers
for idx, row in df[decrease_mask].iterrows():
    end_time = row['DATETIME']
    start_time = end_time - pd.Timedelta(minutes=10)  # Calculate the start time as 10 minutes before the event
    
    # Filter the data to include points within the 10-minute interval
    interval_data = df[(df['DATETIME'] >= start_time) & (df['DATETIME'] <= end_time)]
    
    fig.add_trace(
        go.Scatter(
            x=interval_data['DATETIME'],
            y=interval_data['Glucose value'],
            mode='lines+markers',  # Include both lines and markers
            line=dict(color='blue'),
            marker=dict(color='blue'),  # Marker color
            name='Decrease Event'
        ),
        row=2, col=1  # Add to the second subplot (decrease events)
    )

# Add threshold line to both subplots for below 35
fig.add_shape(
    type="line",
    x0=df['DATETIME'].iloc[0],
    y0=threshold_35,
    x1=df['DATETIME'].iloc[-1],
    y1=threshold_35,
    line=dict(color="red", dash="dash"),
    row=1, col=1  # Add to the first subplot (increase events)
)
fig.add_shape(
    type="line",
    x0=df['DATETIME'].iloc[0],
    y0=threshold_35,
    x1=df['DATETIME'].iloc[-1],
    y1=threshold_35,
    line=dict(color="red", dash="dash"),
    row=2, col=1  # Add to the second subplot (decrease events)
)

# Add rectangles for threshold-based events to both subplots for below 35
for _, event in events_df_35.iterrows():
    fig.add_shape(
        type="rect",
        x0=event['Start'],
        y0=0,
        x1=event['End'],
        y1=300,
        line=dict(color="red"),
        fillcolor="rgba(255, 0, 0, 0.3)",
        row=1, col=1  # Add to the first subplot (increase events)
    )
    fig.add_shape(
        type="rect",
        x0=event['Start'],
        y0=0,
        x1=event['End'],
        y1=300,
        line=dict(color="red"),
        fillcolor="rgba(255, 0, 0, 0.3)",
        row=2, col=1  # Add to the second subplot (decrease events)
    )

fig.update_layout(
    title='Glucose Hybrid Events Visualization with Threshold Below 35',
    xaxis_title='Datetime',
    yaxis_title='Glucose Value (mg/dl)',
    showlegend=False 
)

# Adjust subplot titles' positions
fig.update_annotations(
    selector=dict(text='Any increase of +20 mg/dl or more in continuous 10 minutes or less'),
    yshift=5  # Increase the vertical position for the first subplot title
)
fig.update_annotations(
    selector=dict(text='Any decrease of -20 mg/dl or more in continuous 10 minutes or less'),
    yshift=5  # Increase the vertical position for the second subplot title
)

fig.update_xaxes(title_text="Datetime", row=2, col=1)  # Add x-axis title for the second subplot

# Show the plot
fig.show()

Rapid Increase followed by decrease, rapid decrease followed by Increase with threshold based events¶

In [33]:
# Create subplots with two rows (one for rapid increase-decrease, one for rapid decrease-increase)
fig = sp.make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.25, subplot_titles=("Rapid Increase followed by Rapid Decrease in 1 hour or less", "Rapid Decrease followed by Rapid Increase in 1 hour or less"))

# Calculate the rolling 1-hour difference in glucose values
rolling_diff = df['Glucose value'].diff().rolling(window=12).sum()  # 12 data points in 1 hour

# Create masks for rapid increase-decrease and rapid decrease-increase events
rapid_increase_decrease_mask = (
    (rolling_diff >= 20) &  # Rapid increase of +20 mg/dL or greater
    (df['GlucoseChange'] <= -20) &  # Rapid decrease of -20 mg/dL or more
    (df['TimeDiff'] <= 60)  # Within a timespan of 1 hour or less
)

rapid_decrease_increase_mask = (
    (rolling_diff <= -20) &  # Rapid decrease of -20 mg/dL or more
    (df['GlucoseChange'] >= 20) &  # Rapid increase of +20 mg/dL or greater
    (df['TimeDiff'] <= 60)  # Within a timespan of 1 hour or less
)

# Add rapid increase-decrease events as curves with markers
for idx, row in df[rapid_increase_decrease_mask].iterrows():
    end_time = row['DATETIME']
    start_time = end_time - pd.Timedelta(minutes=60)  # Calculate the start time as 1 hour before the event
    
    # Filter the data to include points within the 1-hour interval
    interval_data = df[(df['DATETIME'] >= start_time) & (df['DATETIME'] <= end_time)]
    
    fig.add_trace(
        go.Scatter(
            x=interval_data['DATETIME'],
            y=interval_data['Glucose value'],
            mode='lines+markers',
            line=dict(color='green'),
            marker=dict(color='green'),
            name='Rapid Inc. followed by Rapid Dec.'
        ),
        row=1, col=1
    )

# Add rapid decrease-increase events as curves with markers
for idx, row in df[rapid_decrease_increase_mask].iterrows():
    end_time = row['DATETIME']
    start_time = end_time - pd.Timedelta(minutes=60)  # Calculate the start time as 1 hour before the event
    
    # Filter the data to include points within the 1-hour interval
    interval_data = df[(df['DATETIME'] >= start_time) & (df['DATETIME'] <= end_time)]
    
    fig.add_trace(
        go.Scatter(
            x=interval_data['DATETIME'],
            y=interval_data['Glucose value'],
            mode='lines+markers',
            line=dict(color='purple'),
            marker=dict(color='purple'),
            name='Rapid Dec. followed by Rapid Inc.'
        ),
        row=2, col=1
    )
    
    
# Add threshold line to both subplots
fig.add_shape(
    type="line",
    x0=df['DATETIME'].iloc[0],
    y0=threshold,
    x1=df['DATETIME'].iloc[-1],
    y1=threshold,
    line=dict(color="red", dash="dash"),
    row=1, col=1  # Add to the first subplot (increase events)
)
fig.add_shape(
    type="line",
    x0=df['DATETIME'].iloc[0],
    y0=threshold,
    x1=df['DATETIME'].iloc[-1],
    y1=threshold,
    line=dict(color="red", dash="dash"),
    row=2, col=1  # Add to the second subplot (decrease events)
)

# Add rectangles for threshold-based events to both subplots
for _, event in events_df.iterrows():
    fig.add_shape(
        type="rect",
        x0=event['Start'],
        y0=0,
        x1=event['End'],
        y1=300,  # Adjust the y1 value to fit your data range
        line=dict(color="red"),
        fillcolor="rgba(255, 0, 0, 0.3)",
        row=1, col=1  # Add to the first subplot (increase events)
    )
    fig.add_shape(
        type="rect",
        x0=event['Start'],
        y0=0,
        x1=event['End'],
        y1=300,  
        line=dict(color="red"),
        fillcolor="rgba(255, 0, 0, 0.3)",
        row=2, col=1  # Add to the second subplot (decrease events)
    )

    
fig.update_layout(
    title='Rapid Glucose Changes Visualization with threshold above 180',
    xaxis_title='Datetime',
    yaxis_title='Glucose Value (mg/dl)',
    showlegend=False
)

# Adjust subplot titles' positions
fig.update_annotations(
    selector=dict(text='Rapid Increase followed by Rapid Decrease in 1 hour or less'),
    yshift=5
)
fig.update_annotations(
    selector=dict(text='Rapid Decrease followed by Rapid Increase in 1 hour or less'),
    yshift=5
)

fig.update_xaxes(title_text="Datetime", row=2, col=1)
fig.show()
In [34]:
# Create subplots with two rows (one for rapid increase-decrease, one for rapid decrease-increase)
fig = sp.make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.25, subplot_titles=("Rapid Increase followed by Rapid Decrease in 1 hour or less", "Rapid Decrease followed by Rapid Increase in 1 hour or less"))

# Calculate the rolling 1-hour difference in glucose values
rolling_diff = df['Glucose value'].diff().rolling(window=12).sum()  # 12 data points in 1 hour

# Create masks for rapid increase-decrease and rapid decrease-increase events
rapid_increase_decrease_mask = (
    (rolling_diff >= 20) &  # Rapid increase of +20 mg/dL or greater
    (df['GlucoseChange'] <= -20) &  # Rapid decrease of -20 mg/dL or more
    (df['TimeDiff'] <= 60)  # Within a timespan of 1 hour or less
)

rapid_decrease_increase_mask = (
    (rolling_diff <= -20) &  # Rapid decrease of -20 mg/dL or more
    (df['GlucoseChange'] >= 20) &  # Rapid increase of +20 mg/dL or greater
    (df['TimeDiff'] <= 60)  # Within a timespan of 1 hour or less
)

# Add rapid increase-decrease events as curves with markers
for idx, row in df[rapid_increase_decrease_mask].iterrows():
    end_time = row['DATETIME']
    start_time = end_time - pd.Timedelta(minutes=60)  # Calculate the start time as 1 hour before the event
    
    # Filter the data to include points within the 1-hour interval
    interval_data = df[(df['DATETIME'] >= start_time) & (df['DATETIME'] <= end_time)]
    
    fig.add_trace(
        go.Scatter(
            x=interval_data['DATETIME'],
            y=interval_data['Glucose value'],
            mode='lines+markers',
            line=dict(color='green'),
            marker=dict(color='green'),
            name='Rapid Inc. followed by Rapid Dec.'
        ),
        row=1, col=1
    )

# Add rapid decrease-increase events as curves with markers
for idx, row in df[rapid_decrease_increase_mask].iterrows():
    end_time = row['DATETIME']
    start_time = end_time - pd.Timedelta(minutes=60)  # Calculate the start time as 1 hour before the event
    
    # Filter the data to include points within the 1-hour interval
    interval_data = df[(df['DATETIME'] >= start_time) & (df['DATETIME'] <= end_time)]
    
    fig.add_trace(
        go.Scatter(
            x=interval_data['DATETIME'],
            y=interval_data['Glucose value'],
            mode='lines+markers',
            line=dict(color='purple'),
            marker=dict(color='purple'),
            name='Rapid Dec. followed by Rapid Inc.'
        ),
        row=2, col=1
    )
    
# Add threshold line to both subplots for below 60
fig.add_shape(
    type="line",
    x0=df['DATETIME'].iloc[0],
    y0=threshold_60,
    x1=df['DATETIME'].iloc[-1],
    y1=threshold_60,
    line=dict(color="red", dash="dash"),
    row=1, col=1  # Add to the first subplot (increase events)
)
fig.add_shape(
    type="line",
    x0=df['DATETIME'].iloc[0],
    y0=threshold_60,
    x1=df['DATETIME'].iloc[-1],
    y1=threshold_60,
    line=dict(color="red", dash="dash"),
    row=2, col=1  # Add to the second subplot (decrease events)
)

# Add rectangles for threshold-based events to both subplots
for _, event in events_df_60.iterrows():
    fig.add_shape(
        type="rect",
        x0=event['Start'],
        y0=0,
        x1=event['End'],
        y1=300,  
        line=dict(color="red"),
        fillcolor="rgba(255, 0, 0, 0.3)",
        row=1, col=1  # Add to the first subplot (increase events)
    )
    fig.add_shape(
        type="rect",
        x0=event['Start'],
        y0=0,
        x1=event['End'],
        y1=300,  
        line=dict(color="red"),
        fillcolor="rgba(255, 0, 0, 0.3)",
        row=2, col=1  # Add to the second subplot (decrease events)
    )

    
fig.update_layout(
    title='Rapid Glucose Changes Visualization with threshold below 60',
    xaxis_title='Datetime',
    yaxis_title='Glucose Value (mg/dl)',
    showlegend=False
)

# Adjust subplot titles' positions
fig.update_annotations(
    selector=dict(text='Rapid Increase followed by Rapid Decrease in 1 hour or less'),
    yshift=5
)
fig.update_annotations(
    selector=dict(text='Rapid Decrease followed by Rapid Increase in 1 hour or less'),
    yshift=5
)

fig.update_xaxes(title_text="Datetime", row=2, col=1)
fig.show()
In [35]:
# Create subplots with two rows (one for rapid increase-decrease, one for rapid decrease-increase)
fig = sp.make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.25, subplot_titles=("Rapid Increase followed by Rapid Decrease in 1 hour or less", "Rapid Decrease followed by Rapid Increase in 1 hour or less"))

# Calculate the rolling 1-hour difference in glucose values
rolling_diff = df['Glucose value'].diff().rolling(window=12).sum()  # 12 data points in 1 hour

# Create masks for rapid increase-decrease and rapid decrease-increase events
rapid_increase_decrease_mask = (
    (rolling_diff >= 20) &  # Rapid increase of +20 mg/dL or greater
    (df['GlucoseChange'] <= -20) &  # Rapid decrease of -20 mg/dL or more
    (df['TimeDiff'] <= 60)  # Within a timespan of 1 hour or less
)

rapid_decrease_increase_mask = (
    (rolling_diff <= -20) &  # Rapid decrease of -20 mg/dL or more
    (df['GlucoseChange'] >= 20) &  # Rapid increase of +20 mg/dL or greater
    (df['TimeDiff'] <= 60)  # Within a timespan of 1 hour or less
)

# Add rapid increase-decrease events as curves with markers
for idx, row in df[rapid_increase_decrease_mask].iterrows():
    end_time = row['DATETIME']
    start_time = end_time - pd.Timedelta(minutes=60)  # Calculate the start time as 1 hour before the event
    
    # Filter the data to include points within the 1-hour interval
    interval_data = df[(df['DATETIME'] >= start_time) & (df['DATETIME'] <= end_time)]
    
    fig.add_trace(
        go.Scatter(
            x=interval_data['DATETIME'],
            y=interval_data['Glucose value'],
            mode='lines+markers',
            line=dict(color='green'),
            marker=dict(color='green'),
            name='Rapid Inc. followed by Rapid Dec.'
        ),
        row=1, col=1
    )

# Add rapid decrease-increase events as curves with markers
for idx, row in df[rapid_decrease_increase_mask].iterrows():
    end_time = row['DATETIME']
    start_time = end_time - pd.Timedelta(minutes=60)  # Calculate the start time as 1 hour before the event
    
    # Filter the data to include points within the 1-hour interval
    interval_data = df[(df['DATETIME'] >= start_time) & (df['DATETIME'] <= end_time)]
    
    fig.add_trace(
        go.Scatter(
            x=interval_data['DATETIME'],
            y=interval_data['Glucose value'],
            mode='lines+markers',
            line=dict(color='purple'),
            marker=dict(color='purple'),
            name='Rapid Dec. followed by Rapid Inc.'
        ),
        row=2, col=1
    )
    
# Add threshold line to both subplots for below 35
fig.add_shape(
    type="line",
    x0=df['DATETIME'].iloc[0],
    y0=threshold_35,
    x1=df['DATETIME'].iloc[-1],
    y1=threshold_35,
    line=dict(color="red", dash="dash"),
    row=1, col=1  # Add to the first subplot (increase events)
)
fig.add_shape(
    type="line",
    x0=df['DATETIME'].iloc[0],
    y0=threshold_35,
    x1=df['DATETIME'].iloc[-1],
    y1=threshold_35,
    line=dict(color="red", dash="dash"),
    row=2, col=1  # Add to the second subplot (decrease events)
)

# Add rectangles for threshold-based events to both subplots
for _, event in events_df_35.iterrows():
    fig.add_shape(
        type="rect",
        x0=event['Start'],
        y0=0,
        x1=event['End'],
        y1=300, 
        line=dict(color="red"),
        fillcolor="rgba(255, 0, 0, 0.3)",
        row=1, col=1  # Add to the first subplot (increase events)
    )
    fig.add_shape(
        type="rect",
        x0=event['Start'],
        y0=0,
        x1=event['End'],
        y1=300,  # Adjust the y1 value to fit your data range
        line=dict(color="red"),
        fillcolor="rgba(255, 0, 0, 0.3)",
        row=2, col=1  # Add to the second subplot (decrease events)
    )

    
fig.update_layout(
    title='Rapid Glucose Changes Visualization with threshold below 35',
    xaxis_title='Datetime',
    yaxis_title='Glucose Value (mg/dl)',
    showlegend=False
)

# Adjust subplot titles' positions
fig.update_annotations(
    selector=dict(text='Rapid Increase followed by Rapid Decrease in 1 hour or less'),
    yshift=5
)
fig.update_annotations(
    selector=dict(text='Rapid Decrease followed by Rapid Increase in 1 hour or less'),
    yshift=5
)

fig.update_xaxes(title_text="Datetime", row=2, col=1)
fig.show()

Over correction plot +/- 30 mg/dl in continuous 10 min or less and threshold based events¶

In [36]:
# Identify overcorrection events
overcorrection_mask = ((df['GlucoseChange'] >= 30) | (df['GlucoseChange'] <= -30)) & (df['TimeDiff'] <= 10)

# Create an empty figure
fig = go.Figure()

# Plot each overcorrection event with exact data points
for idx, row in df[overcorrection_mask].iterrows():
    before_start_time = row['DATETIME'] - pd.Timedelta(minutes=10)
    after_end_time = row['DATETIME']
    subset = df[(df['DATETIME'] >= before_start_time) & (df['DATETIME'] <= after_end_time)]
    
    fig.add_trace(
        go.Scatter(
            x=subset['DATETIME'],
            y=subset['Glucose value'],
            mode='lines+markers',
            line=dict(color='purple'),
            marker=dict(color='purple'),
            name='Overcorrection Event'
        )
    )

# Add dashed line at the glucose threshold
fig.add_shape(
    type="line",
    x0=df['DATETIME'].iloc[0],
    y0=threshold,
    x1=df['DATETIME'].iloc[-1],
    y1=threshold,
    line=dict(color="red", dash="dash"),
)

# Add rectangles for threshold-based events
for _, event in events_df.iterrows():
    fig.add_shape(
        type="rect",
        x0=event['Start'],
        y0=0,
        x1=event['End'],
        y1=300,  # Adjust the y1 value to fit your data range
        line=dict(color="red"),
        fillcolor="rgba(255, 0, 0, 0.3)",
    )
    
fig.update_layout(
    title='Overcorrection plot of +/- 30 mg/dl in continuous 10 min or less and threshold above 180',
    xaxis_title='Datetime',
    yaxis_title='Glucose Value (mg/dl)',
    showlegend=False
)
fig.show()
In [37]:
# Identify overcorrection events
overcorrection_mask = ((df['GlucoseChange'] >= 30) | (df['GlucoseChange'] <= -30)) & (df['TimeDiff'] <= 10)

# Create an empty figure
fig = go.Figure()

# Plot each overcorrection event with exact data points
for idx, row in df[overcorrection_mask].iterrows():
    before_start_time = row['DATETIME'] - pd.Timedelta(minutes=10)
    after_end_time = row['DATETIME']
    subset = df[(df['DATETIME'] >= before_start_time) & (df['DATETIME'] <= after_end_time)]
    
    fig.add_trace(
        go.Scatter(
            x=subset['DATETIME'],
            y=subset['Glucose value'],
            mode='lines+markers',
            line=dict(color='purple'),
            marker=dict(color='purple'),
            name='Overcorrection Event'
        )
    )

# Add dashed line at the glucose threshold
fig.add_shape(
    type="line",
    x0=df['DATETIME'].iloc[0],
    y0=threshold_60,
    x1=df['DATETIME'].iloc[-1],
    y1=threshold_60,
    line=dict(color="red", dash="dash"),
)

# Add rectangles for threshold-based events
for _, event in events_df_60.iterrows():
    fig.add_shape(
        type="rect",
        x0=event['Start'],
        y0=0,
        x1=event['End'],
        y1=300,  # Adjust the y1 value to fit your data range
        line=dict(color="red"),
        fillcolor="rgba(255, 0, 0, 0.3)",
    )
    
fig.update_layout(
    title='Overcorrection plot of +/- 30 mg/dl in continuous 10 min or less and threshold below 60',
    xaxis_title='Datetime',
    yaxis_title='Glucose Value (mg/dl)',
    showlegend=False
)
fig.show()
In [38]:
# Identify overcorrection events
overcorrection_mask = ((df['GlucoseChange'] >= 30) | (df['GlucoseChange'] <= -30)) & (df['TimeDiff'] <= 10)

# Create an empty figure
fig = go.Figure()

# Plot each overcorrection event with exact data points
for idx, row in df[overcorrection_mask].iterrows():
    before_start_time = row['DATETIME'] - pd.Timedelta(minutes=10)
    after_end_time = row['DATETIME']
    subset = df[(df['DATETIME'] >= before_start_time) & (df['DATETIME'] <= after_end_time)]
    
    fig.add_trace(
        go.Scatter(
            x=subset['DATETIME'],
            y=subset['Glucose value'],
            mode='lines+markers',
            line=dict(color='purple'),
            marker=dict(color='purple'),
            name='Overcorrection Event'
        )
    )

# Add dashed line at the glucose threshold
fig.add_shape(
    type="line",
    x0=df['DATETIME'].iloc[0],
    y0=threshold_35,
    x1=df['DATETIME'].iloc[-1],
    y1=threshold_35,
    line=dict(color="red", dash="dash"),
)

# Add rectangles for threshold-based events
for _, event in events_df_35.iterrows():
    fig.add_shape(
        type="rect",
        x0=event['Start'],
        y0=0,
        x1=event['End'],
        y1=300,  # Adjust the y1 value to fit your data range
        line=dict(color="red"),
        fillcolor="rgba(255, 0, 0, 0.3)",
    )
    
fig.update_layout(
    title='Over correction plot of +/- 30 mg/dl in continuous 10 min or less and threshold below 35',
    xaxis_title='Datetime',
    yaxis_title='Glucose Value (mg/dl)',
    showlegend=False
)
fig.show()

Dashboard creation including all features¶

In [39]:
import pandas as pd
import numpy as np
import plotly.graph_objs as go
import plotly.subplots as sp
import dash
import dash_core_components as dcc
import dash_html_components as html
import plotly.express as px
from dash.dependencies import Input, Output

# Load data here
df = pd.read_csv('C:/Anaconda/xdrip3-mod.csv')

mapping = {
    'DAY': 'Date',
    'TIME': 'Time',
    'UDT_CGMS': 'Glucose value'
}
df = df.rename(columns=mapping)

# Remove rows with NaN in 'Glucose value' column
df = df.dropna(subset=['Glucose value'])

# Convert 'Date' column to datetime type with correct format
df['Date'] = pd.to_datetime(df['Date'], format='%d.%m.%Y')
df['Time'] = pd.to_datetime(df['Time'], format='%H:%M')

# Create a time-only column without date information
df['TimeOnly'] = df['Time'].apply(lambda x: x.time())

# Combine 'Date' and 'Time' columns into a single 'Datetime' column
df['DATETIME'] = df['Date'] + pd.to_timedelta(df['Time'].dt.hour, unit='h') + pd.to_timedelta(df['Time'].dt.minute, unit='m')

# Calculate median and quartiles for each 60-minute interval
interval_length = pd.Timedelta(minutes=60)
df['Interval'] = (df['DATETIME'].dt.hour * 60 + df['DATETIME'].dt.minute) // 60
interval_stats = df.groupby(['Interval'])['Glucose value'].agg([
    np.median,
    lambda x: np.percentile(x, 10),
    lambda x: np.percentile(x, 25),
    lambda x: np.percentile(x, 75),
    lambda x: np.percentile(x, 90)
]).reset_index()
interval_stats['DATETIME'] = pd.to_timedelta(interval_stats['Interval'] * 60, unit='m')

# Define a function to convert time intervals back to meaningful time values
def convert_interval_to_time(interval):
    total_minutes = interval.total_seconds() / 60
    hours = int(total_minutes // 60)
    minutes = int(total_minutes % 60)
    return f"{hours:02d}:{minutes:02d}"

# Apply the conversion function to create x-axis labels
interval_stats['Time_Labels'] = interval_stats['DATETIME'].apply(convert_interval_to_time)

# Create the Dash app
app = dash.Dash(__name__)

# Define the app layout
app.layout = html.Div([
    dcc.Tabs([
        dcc.Tab(label='Summary', children=[
            html.Div([
                dcc.DatePickerRange(
                    id='date-picker',
                    start_date=df['DATETIME'].min().date(),
                    end_date=df['DATETIME'].max().date(),
                    display_format='YYYY-MM-DD'
                ),
                dcc.Graph(id='glucose-plot')
            ])
        ]),
        dcc.Tab(label='Time-series plot of all data', children=[
            html.Div([
                dcc.Graph(id='glucose-time-series')
            ])
        ]),
        dcc.Tab(label='Superimposed view of all data', children=[
            html.Div([
                dcc.Graph(id='daily-glucose-plot')
            ])
        ]),
        dcc.Tab(label='Daily view', children=[
            html.Div([
                dcc.Graph(id='fourth-tab-plot'),
                dcc.Slider(
                    id='date-slider',
                    min=df['Date'].min().date().toordinal(),
                    max=df['Date'].max().date().toordinal(),
                    value=df['Date'].min().date().toordinal(),
                    marks={d.toordinal(): d.strftime('%Y-%m') for i, d in enumerate(df['Date']) if d.day == 1},
                    step=1
                ),
                dcc.RangeSlider(
                    id='time-range-slider',
                    min=0,
                    max=24,
                    step=0.5,
                    value=[0, 24],
                    marks={i: f"{i}:00" for i in range(0, 25, 2)}
                )
            ])
        ]),
        dcc.Tab(label='Threshold based events', children=[
            html.Div([
                dcc.Graph(id='above-threshold-plot'),
                dcc.Graph(id='below-threshold-60-plot'),
                dcc.Graph(id='below-threshold-35-plot')
            ])
        ]),
        dcc.Tab(label='Pattern based events', children=[
            dcc.Dropdown(
                id='pattern-dropdown',
                options=[
                    {'label': 'Increase/Decrease of 20mg/dl', 'value': 'increase_decrease_20'},
                    {'label': 'Rapid increase/decrease of 20mg/dl', 'value': 'rapid_increase_decrease_20'},
                    {'label': 'Over correction of +/- 30 mg/dl', 'value': 'over_correction_30'},
                ],
                value='increase_decrease_20',
                clearable=False
            ),
            html.Div(id='pattern-event-plot')
        ])

        # ... (add more tabs as needed) ...
    ])
])

# Callback for updating the glucose plot based on selected dates
@app.callback(
    Output('glucose-plot', 'figure'),
    [Input('date-picker', 'start_date'),
     Input('date-picker', 'end_date')]
)
def update_graph(start_date, end_date):
    filtered_df = df[(df['DATETIME'].dt.date >= pd.to_datetime(start_date)) & (df['DATETIME'].dt.date <= pd.to_datetime(end_date))]
    
    # Calculate interval_stats for the filtered data
    filtered_interval_stats = filtered_df.groupby(['Interval'])['Glucose value'].agg([
        np.median,
        lambda x: np.percentile(x, 10),
        lambda x: np.percentile(x, 25),
        lambda x: np.percentile(x, 75),
        lambda x: np.percentile(x, 90)
    ]).reset_index()
    filtered_interval_stats['DATETIME'] = pd.to_timedelta(filtered_interval_stats['Interval'] * 60, unit='m')

    # Apply the conversion function to create x-axis labels
    filtered_interval_stats['Time_Labels'] = filtered_interval_stats['DATETIME'].apply(convert_interval_to_time)

    # Create traces for median, quartiles, and percentiles
    median_trace = go.Scatter(
        x=filtered_interval_stats['DATETIME'],
        y=filtered_interval_stats['median'],
        mode='lines',
        name='Median',
        line=dict(color='red', dash='dash')
    )

    p10_trace = go.Scatter(
        x=filtered_interval_stats['DATETIME'],
        y=filtered_interval_stats['<lambda_0>'],
        mode='lines',
        name='10th Percentile',
        line=dict(color='purple')
    )

    q1_trace = go.Scatter(
        x=filtered_interval_stats['DATETIME'],
        y=filtered_interval_stats['<lambda_1>'],
        mode='lines',
        name='25th Percentile',
        line=dict(color='blue')
    )

    q3_trace = go.Scatter(
        x=filtered_interval_stats['DATETIME'],
        y=filtered_interval_stats['<lambda_2>'],
        mode='lines',
        name='75th Percentile',
        line=dict(color='green')
    )

    p90_trace = go.Scatter(
        x=filtered_interval_stats['DATETIME'],
        y=filtered_interval_stats['<lambda_3>'],
        mode='lines',
        name='90th Percentile',
        line=dict(color='orange')
    )

    # Create traces for shaded regions
    shade_25_75 = go.Scatter(
        x=filtered_interval_stats['DATETIME'].append(filtered_interval_stats['DATETIME'][::-1]),
        y=filtered_interval_stats['<lambda_2>'].append(filtered_interval_stats['<lambda_1>'][::-1]),
        fill='toself',
        fillcolor='rgba(0,255,0,0.2)',  # Light green shading
        line=dict(color='rgba(255,255,255,0)'),
        name='25th to 75th Percentile'
    )

    shade_10_90 = go.Scatter(
        x=filtered_interval_stats['DATETIME'].append(filtered_interval_stats['DATETIME'][::-1]),
        y=filtered_interval_stats['<lambda_0>'].append(filtered_interval_stats['<lambda_3>'][::-1]),
        fill='toself',
        fillcolor='rgba(255,182,193,0.2)',  # Light pink shading
        line=dict(color='rgba(255,255,255,0)'),
        name='10th to 90th Percentile'
    )

    # Add dashed lines for thresholds
    thresholds = [250, 180, 70, 60, 50]
    threshold_shapes = []
    for threshold in thresholds:
        threshold_shapes.append(
            {
                'type': 'line',
                'x0': min(filtered_interval_stats['DATETIME']),
                'x1': max(filtered_interval_stats['DATETIME']),
                'y0': threshold,
                'y1': threshold,
                'line': {'color': 'gray', 'width': 1, 'dash': 'dash'},
                'name': f'Threshold {threshold}'
            }
        )
    
    # Create the layout
    layout = go.Layout(
        title='Time Series Plot with Median and Percentiles of Glucose value for each 60 min interval',
        xaxis=dict(title='Time', tickvals=interval_stats['DATETIME'], ticktext=interval_stats['Time_Labels']),
        yaxis=dict(title='Glucose value', dtick=60),  # Set dtick to 60 for y-axis
        showlegend=True 
    )
    
    fig = go.Figure(data=[median_trace, p10_trace, q1_trace, q3_trace, p90_trace, shade_25_75, shade_10_90], layout=layout)
    return fig

# Callback for updating the glucose time series plot
@app.callback(
    Output('glucose-time-series', 'figure'),
    [Input('date-picker', 'start_date'),
     Input('date-picker', 'end_date')]
)
def update_glucose_time_series(start_date, end_date):

    # Create the Plotly figure
    fig = px.line(df, x='DATETIME', y='Glucose value', title='Glucose Readings Time Series Plot')

    # Add dashed lines for thresholds
    thresholds = [180, 100, 35]
    for threshold in thresholds:
        fig.add_shape(type='line', x0=min(df['DATETIME']), x1=max(df['DATETIME']),
                      y0=threshold, y1=threshold,
                      line=dict(color='red', width=1, dash='dash'),
                      name=f'Threshold {threshold}')

    # Add a range slider for selecting the date range
    fig.update_xaxes(rangeslider_visible=True)

    return fig

# Callback for updating the daily glucose time series plot
@app.callback(
    Output('daily-glucose-plot', 'figure'),
    [Input('date-picker', 'start_date'),
     Input('date-picker', 'end_date')]
)
def update_daily_glucose_plot(start_date, end_date):
    fig = go.Figure()

    for day, data in df.groupby('Date'):
        formatted_date = f"{day.day} {day.strftime('%b')} {day.year}" 
        fig.add_trace(go.Scatter(x=data['Time'], y=data['Glucose value'],
                                 mode='lines',
                                 name=f'Glucose value - {formatted_date}'))

    # Add dashed lines for thresholds
    thresholds = [180, 100, 35]
    for threshold in thresholds:
        fig.add_shape(type='line', x0=min(df['Time']), x1=max(df['Time']),
                      y0=threshold, y1=threshold,
                      line=dict(color='red', width=1, dash='dash'),
                      name=f'Threshold {threshold}')
    
    # Update x-axis and y-axis labels
    fig.update_xaxes(title_text='Time', tickformat='%H:%M')
    fig.update_yaxes(title_text='Glucose value')

    # Set plot title
    fig.update_layout(title_text='Time Series Plot - Glucose values for all days')

    return fig

# Callback for updating the fourth tab plot based on the slider values
@app.callback(
    Output('fourth-tab-plot', 'figure'),
    [Input('date-slider', 'value'),
     Input('time-range-slider', 'value')]
)
def update_fourth_tab_plot(selected_date_ordinal, time_range):
    selected_date = pd.Timestamp.fromordinal(selected_date_ordinal)
    filtered_df = df[df['Date'] == selected_date]
    start_hour, end_hour = time_range
    
    filtered_df = filtered_df[
        (filtered_df['TimeOnly'].apply(lambda x: x.hour) >= start_hour) & 
        (filtered_df['TimeOnly'].apply(lambda x: x.hour) <= end_hour)
    ]
    
    fig = go.Figure()
    for day, data in filtered_df.groupby('Date'):
        trace = go.Scatter(x=data['TimeOnly'], y=data['Glucose value'], mode='lines', name=str(day.date()))
        fig.add_trace(trace)
    
    # Add dashed lines for thresholds
    thresholds = [180, 100, 35]
    for threshold in thresholds:
        fig.add_shape(type='line', x0=min(filtered_df['TimeOnly']), x1=max(filtered_df['TimeOnly']),
                      y0=threshold, y1=threshold,
                      line=dict(color='red', width=1, dash='dash'),
                      name=f'Threshold {threshold}')
    
    fig.update_layout(
        xaxis_title='Time',
        yaxis_title='Glucose value',
        title='Glucose Readings Time Series',
        showlegend=True
    )
    
    return fig

# Callback for updating the above-threshold events plot
@app.callback(
    Output('above-threshold-plot', 'figure'),
    [Input('date-picker', 'start_date'),
     Input('date-picker', 'end_date')]
)
def update_above_threshold_plot(start_date, end_date):
    filtered_df = df[(df['DATETIME'].dt.date >= pd.to_datetime(start_date)) & (df['DATETIME'].dt.date <= pd.to_datetime(end_date))]

    above_threshold_events = []
    threshold = 180
    continuous_minutes = 15

    current_event_start = None
    for index, row in filtered_df.iterrows():
        if row['Glucose value'] > threshold:
            if current_event_start is None:
                current_event_start = row['DATETIME']
        else:
            if current_event_start is not None:
                event_duration = (row['DATETIME'] - current_event_start).total_seconds() / 60
                if event_duration >= continuous_minutes:
                    above_threshold_events.append((current_event_start, row['DATETIME']))
                current_event_start = None

    # Convert the events to a dataframe
    events_df = pd.DataFrame(above_threshold_events, columns=['Start', 'End'])

    # Create the time series plot
    fig = px.line(filtered_df, x='DATETIME', y='Glucose value', title='Glucose value Time Series Plot for Above 180mg/dl')

    # Add dashed line at the glucose threshold
    fig.add_shape(
        type="line",
        x0=filtered_df['DATETIME'].iloc[0],
        y0=threshold,
        x1=filtered_df['DATETIME'].iloc[-1],
        y1=threshold,
        line=dict(color="red", dash="dash"),
    )

    # Add rectangles for threshold-based events
    for _, event in events_df.iterrows():
        fig.add_shape(
            type="rect",
            x0=event['Start'],
            y0=0,
            x1=event['End'],
            y1=300,  # Adjust the y1 value to fit your data range
            line=dict(color="red"),
            fillcolor="rgba(255, 0, 0, 0.3)",
        )

    return fig

# Callback for updating the below-threshold-60 events plot
@app.callback(
    Output('below-threshold-60-plot', 'figure'),
    [Input('date-picker', 'start_date'),
     Input('date-picker', 'end_date')]
)
def update_below_threshold_60_plot(start_date, end_date):
    filtered_df = df[(df['DATETIME'].dt.date >= pd.to_datetime(start_date)) & (df['DATETIME'].dt.date <= pd.to_datetime(end_date))]

    below_threshold_events_60 = []
    threshold_60 = 60
    continuous_minutes = 15

    current_event_start = None
    for index, row in filtered_df.iterrows():
        if row['Glucose value'] < threshold_60:
            if current_event_start is None:
                current_event_start = row['DATETIME']
        else:
            if current_event_start is not None:
                event_duration = (row['DATETIME'] - current_event_start).total_seconds() / 60
                if event_duration >= continuous_minutes:
                    below_threshold_events_60.append((current_event_start, row['DATETIME']))
                current_event_start = None

    # Convert the events to a dataframe
    events_df_60 = pd.DataFrame(below_threshold_events_60, columns=['Start', 'End'])

    # Create the time series plot
    fig_below_60 = px.line(filtered_df, x='DATETIME', y='Glucose value', title='Glucose value Time Series Plot below 60mg/dl')

    # Add dashed line at the glucose threshold
    fig_below_60.add_shape(
        type="line",
        x0=filtered_df['DATETIME'].iloc[0],
        y0=threshold_60,
        x1=filtered_df['DATETIME'].iloc[-1],
        y1=threshold_60,
        line=dict(color="red", dash="dash"),
    )

    # Add rectangles for threshold-based events
    for _, event in events_df_60.iterrows():
        fig_below_60.add_shape(
            type="rect",
            x0=event['Start'],
            y0=0,
            x1=event['End'],
            y1=300,  # Adjust the y1 value to fit your data range
            line=dict(color="red"),
            fillcolor="rgba(255, 0, 0, 0.3)",
        )

    return fig_below_60

# Callback for updating the below-threshold-35 events plot
@app.callback(
    Output('below-threshold-35-plot', 'figure'),
    [Input('date-picker', 'start_date'),
     Input('date-picker', 'end_date')]
)
def update_below_threshold_35_plot(start_date, end_date):
    filtered_df = df[(df['DATETIME'].dt.date >= pd.to_datetime(start_date)) & (df['DATETIME'].dt.date <= pd.to_datetime(end_date))]

    below_threshold_events_35 = []
    threshold_35 = 35
    continuous_minutes = 15

    current_event_start = None
    for index, row in filtered_df.iterrows():
        if row['Glucose value'] < threshold_35:
            if current_event_start is None:
                current_event_start = row['DATETIME']
        else:
            if current_event_start is not None:
                event_duration = (row['DATETIME'] - current_event_start).total_seconds() / 60
                if event_duration >= continuous_minutes:
                    below_threshold_events_35.append((current_event_start, row['DATETIME']))
                current_event_start = None

    # Convert the events to a dataframe
    events_df_35 = pd.DataFrame(below_threshold_events_35, columns=['Start', 'End'])

    # Create the time series plot
    fig_below_35 = px.line(filtered_df, x='DATETIME', y='Glucose value', title='Glucose value Time Series Plot below 35mg/dl')

    # Add dashed line at the glucose threshold
    fig_below_35.add_shape(
        type="line",
        x0=filtered_df['DATETIME'].iloc[0],
        y0=threshold_35,
        x1=filtered_df['DATETIME'].iloc[-1],
        y1=threshold_35,
        line=dict(color="red", dash="dash"),
    )

    # Add rectangles for threshold-based events
    for _, event in events_df_35.iterrows():
        fig_below_35.add_shape(
            type="rect",
            x0=event['Start'],
            y0=0,
            x1=event['End'],
            y1=300,  # Adjust the y1 value to fit your data range
            line=dict(color="red"),
            fillcolor="rgba(255, 0, 0, 0.3)",
        )

    return fig_below_35

# Callback for Pattern based events
@app.callback(
    Output('pattern-event-plot', 'children'),
    [Input('pattern-dropdown', 'value')]
)
def update_pattern_event_plot(selected_pattern):
    if selected_pattern == 'increase_decrease_20':
        # Calculate time differences and glucose changes
        df['TimeDiff'] = df['DATETIME'].diff().dt.total_seconds() / 60.0
        df['GlucoseChange'] = df['Glucose value'].diff()

        # Identify increases and decreases exceeding thresholds
        increase_mask = (df['GlucoseChange'] >= 20) & (df['TimeDiff'] <= 10)
        decrease_mask = (df['GlucoseChange'] <= -20) & (df['TimeDiff'] <= 10)

        df['IncreaseEvent'] = increase_mask
        df['DecreaseEvent'] = decrease_mask
        
        # Create subplots with two rows (one for increase events, one for decrease events)
        fig = sp.make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.15, subplot_titles=("Any increase of +20 mg/dl or more in continuous 10 minutes or less", "Any decrease of -20 mg/dl or more in continuous 10 minutes or less"))

        # Add increase events as curves with markers
        for idx, row in df[increase_mask].iterrows():
            end_time = row['DATETIME']
            start_time = end_time - pd.Timedelta(minutes=10)  # Calculate the start time as 10 minutes before the event

            # Filter the data to include points within the 10-minute interval
            interval_data = df[(df['DATETIME'] >= start_time) & (df['DATETIME'] <= end_time)]

            fig.add_trace(
                go.Scatter(
                    x=interval_data['DATETIME'],
                    y=interval_data['Glucose value'],
                    mode='lines+markers',  # Include both lines and markers
                    line=dict(color='red'),
                    marker=dict(color='red'),  # Marker color
                    name='Increase Event'
                ),
                row=1, col=1  # Add to the first subplot (increase events)
            )

        # Add decrease events as curves with markers
        for idx, row in df[decrease_mask].iterrows():
            end_time = row['DATETIME']
            start_time = end_time - pd.Timedelta(minutes=10)  # Calculate the start time as 10 minutes before the event

            # Filter the data to include points within the 10-minute interval
            interval_data = df[(df['DATETIME'] >= start_time) & (df['DATETIME'] <= end_time)]

            fig.add_trace(
                go.Scatter(
                    x=interval_data['DATETIME'],
                    y=interval_data['Glucose value'],
                    mode='lines+markers',  # Include both lines and markers
                    line=dict(color='blue'),
                    marker=dict(color='blue'),  # Marker color
                    name='Decrease Event'
                ),
                row=2, col=1  # Add to the second subplot (decrease events)
            )

        fig.update_layout(
            title='Glucose Events Visualization - Any increase of +20 mg/dl or more and Any decrease of -20 mg/dl or more',
#             xaxis_title='Datetime',
            yaxis_title='Glucose Value (mg/dl)',
            showlegend=False
        )

        # Adjust subplot titles' positions
        fig.update_annotations(
            selector=dict(text='Any increase of +20 mg/dl or more in continuous 10 minutes or less'),
            yshift=5 # Increase the vertical position for the first subplot title
        )
        fig.update_annotations(
            selector=dict(text='Any decrease of -20 mg/dl or more in continuous 10 minutes or less'),
            yshift=5  # Increase the vertical position for the second subplot title
        )

        fig.update_xaxes(title_text="Datetime", row=2, col=1)  # Add x-axis title for the second subplot
        fig.show()

    elif selected_pattern == 'rapid_increase_decrease_20':
        # Create subplots with two rows (one for rapid increase-decrease, one for rapid decrease-increase)
        fig = sp.make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.25, subplot_titles=("Rapid Increase followed by Rapid Decrease in 1 hour or less", "Rapid Decrease followed by Rapid Increase in 1 hour or less"))

        # Calculate the rolling 1-hour difference in glucose values
        rolling_diff = df['Glucose value'].diff().rolling(window=12).sum()  # 12 data points in 1 hour

        # Create masks for rapid increase-decrease and rapid decrease-increase events
        rapid_increase_decrease_mask = (
            (rolling_diff >= 20) &  # Rapid increase of +20 mg/dL or greater
            (df['GlucoseChange'] <= -20) &  # Rapid decrease of -20 mg/dL or more
            (df['TimeDiff'] <= 60)  # Within a timespan of 1 hour or less
        )

        rapid_decrease_increase_mask = (
            (rolling_diff <= -20) &  # Rapid decrease of -20 mg/dL or more
            (df['GlucoseChange'] >= 20) &  # Rapid increase of +20 mg/dL or greater
            (df['TimeDiff'] <= 60)  # Within a timespan of 1 hour or less
        )

        # Add rapid increase-decrease events as curves with markers
        for idx, row in df[rapid_increase_decrease_mask].iterrows():
            end_time = row['DATETIME']
            start_time = end_time - pd.Timedelta(minutes=60)  # Calculate the start time as 1 hour before the event

            # Filter the data to include points within the 1-hour interval
            interval_data = df[(df['DATETIME'] >= start_time) & (df['DATETIME'] <= end_time)]

            fig.add_trace(
                go.Scatter(
                    x=interval_data['DATETIME'],
                    y=interval_data['Glucose value'],
                    mode='lines+markers',
                    line=dict(color='green'),
                    marker=dict(color='green'),
                    name='Rapid Inc. followed by Rapid Dec.'
                ),
                row=1, col=1
            )

        # Add rapid decrease-increase events as curves with markers
        for idx, row in df[rapid_decrease_increase_mask].iterrows():
            end_time = row['DATETIME']
            start_time = end_time - pd.Timedelta(minutes=60)  # Calculate the start time as 1 hour before the event

            # Filter the data to include points within the 1-hour interval
            interval_data = df[(df['DATETIME'] >= start_time) & (df['DATETIME'] <= end_time)]

            fig.add_trace(
                go.Scatter(
                    x=interval_data['DATETIME'],
                    y=interval_data['Glucose value'],
                    mode='lines+markers',
                    line=dict(color='purple'),
                    marker=dict(color='purple'),
                    name='Rapid Dec. followed by Rapid Inc.'
                ),
                row=2, col=1
            )

        fig.update_layout(
            title='Rapid Glucose Changes Visualization',
#             xaxis_title='Datetime',
            yaxis_title='Glucose Value (mg/dl)',
            showlegend=False
        )

        # Adjust subplot titles' positions
        fig.update_annotations(
            selector=dict(text='Rapid Increase followed by Rapid Decrease in 1 hour or less'),
            yshift=5
        )
        fig.update_annotations(
            selector=dict(text='Rapid Decrease followed by Rapid Increase in 1 hour or less'),
            yshift=5
        )

        fig.update_xaxes(title_text="Datetime", row=2, col=1)
        fig.show()

    elif selected_pattern == 'over_correction_30':
        
        # Identify overcorrection events
        overcorrection_mask = ((df['GlucoseChange'] >= 30) | (df['GlucoseChange'] <= -30)) & (df['TimeDiff'] <= 10)

        # Create an empty figure
        fig = go.Figure()

        # Plot each overcorrection event with exact data points
        for idx, row in df[overcorrection_mask].iterrows():
            before_start_time = row['DATETIME'] - pd.Timedelta(minutes=10)
            after_end_time = row['DATETIME']
            subset = df[(df['DATETIME'] >= before_start_time) & (df['DATETIME'] <= after_end_time)]

            fig.add_trace(
                go.Scatter(
                    x=subset['DATETIME'],
                    y=subset['Glucose value'],
                    mode='lines+markers',
                    line=dict(color='purple'),
                    marker=dict(color='purple'),
                    name='Overcorrection Event'
                )
            )

        fig.update_layout(
            title='Over correction plot of +/- 30 mg/dl in continuous 10 min or less',
            xaxis_title='Datetime',
            yaxis_title='Glucose Value (mg/dl)',
            showlegend=False
        )
        fig.show()

    # Return the appropriate pattern-based event plot
    return dcc.Graph(figure=fig)  


# Run the app
if __name__ == '__main__':
    app.run_server(debug=True)
C:\Anaconda\lib\site-packages\pandas\core\ops\array_ops.py:73: FutureWarning:

Comparison of Timestamp with datetime.date is deprecated in order to match the standard library behavior.  In a future version these will be considered non-comparable.Use 'ts == pd.Timestamp(date)' or 'ts.date() == date' instead.

C:\Anaconda\lib\site-packages\pandas\core\ops\array_ops.py:73: FutureWarning:

Comparison of Timestamp with datetime.date is deprecated in order to match the standard library behavior.  In a future version these will be considered non-comparable.Use 'ts == pd.Timestamp(date)' or 'ts.date() == date' instead.

C:\Anaconda\lib\site-packages\pandas\core\ops\array_ops.py:73: FutureWarning:

Comparison of Timestamp with datetime.date is deprecated in order to match the standard library behavior.  In a future version these will be considered non-comparable.Use 'ts == pd.Timestamp(date)' or 'ts.date() == date' instead.

C:\Anaconda\lib\site-packages\pandas\core\ops\array_ops.py:73: FutureWarning:

Comparison of Timestamp with datetime.date is deprecated in order to match the standard library behavior.  In a future version these will be considered non-comparable.Use 'ts == pd.Timestamp(date)' or 'ts.date() == date' instead.

In [40]:
import plotly.subplots as sp
from sklearn.cluster import KMeans
import numpy as np

# Create subplots with two rows (one for increase events, one for decrease events)
fig = sp.make_subplots(
    rows=2,
    cols=1,
    shared_xaxes=True,
    vertical_spacing=0.15,
    subplot_titles=("Similar increase events clustering", "Similar decrease events clustering"),
    row_heights=[1, 1],  # Adjust the height ratios as needed
    specs=[[{"type": "scatter"}], [{"type": "scatter"}]]  # Define subplot types as "scatter"
)
# Remove rows with missing values
df_cleaned = df.dropna(subset=['GlucoseChange', 'TimeDiff'])

# Extract the relevant data for clustering (Glucose Change and TimeDiff)
increase_data = df_cleaned[df_cleaned['IncreaseEvent']][['GlucoseChange', 'TimeDiff']].values
decrease_data = df_cleaned[df_cleaned['DecreaseEvent']][['GlucoseChange', 'TimeDiff']].values

# Initialize cluster labels for rows
df_cleaned['IncreaseCluster'] = np.nan
df_cleaned['DecreaseCluster'] = np.nan

# Apply K-Means clustering separately for increase and decrease events
n_clusters = 3  #  number of clusters 
kmeans_increase = KMeans(n_clusters=n_clusters)
increase_labels = kmeans_increase.fit_predict(increase_data)
df_cleaned.loc[df_cleaned['IncreaseEvent'], 'IncreaseCluster'] = increase_labels

kmeans_decrease = KMeans(n_clusters=n_clusters)
decrease_labels = kmeans_decrease.fit_predict(decrease_data)
df_cleaned.loc[df_cleaned['DecreaseEvent'], 'DecreaseCluster'] = decrease_labels

# Define colors for clusters
cluster_colors = ['red', 'blue', 'green']

# Add increase events as curves with markers, colored by cluster
for cluster in range(n_clusters):
    cluster_mask = (df_cleaned['IncreaseEvent']) & (df_cleaned['IncreaseCluster'] == cluster)
    cluster_data = df_cleaned[cluster_mask]
    
    for idx, row in cluster_data.iterrows():
        end_time = row['DATETIME']
        start_time = end_time - pd.Timedelta(minutes=10)  # Calculate the start time as 10 minutes before the event

        # Filter the data to include points within the 10-minute interval
        interval_data = df_cleaned[(df_cleaned['DATETIME'] >= start_time) & (df_cleaned['DATETIME'] <= end_time)]

        fig.add_trace(
            go.Scatter(
                x=interval_data['DATETIME'],
                y=interval_data['Glucose value'],
                mode='lines+markers',  # Include both lines and markers
                line=dict(color=cluster_colors[cluster]),
                marker=dict(color=cluster_colors[cluster]),  # Marker color
                name=f'Increase Cluster {cluster}'
            ),
            row=1, col=1  # Add to the first subplot (increase events)
        )

# Add decrease events as curves with markers, colored by cluster
for cluster in range(n_clusters):
    cluster_mask = (df_cleaned['DecreaseEvent']) & (df_cleaned['DecreaseCluster'] == cluster)
    cluster_data = df_cleaned[cluster_mask]
    
    for idx, row in cluster_data.iterrows():
        end_time = row['DATETIME']
        start_time = end_time - pd.Timedelta(minutes=10)  # Calculate the start time as 10 minutes before the event

        # Filter the data to include points within the 10-minute interval
        interval_data = df_cleaned[(df_cleaned['DATETIME'] >= start_time) & (df_cleaned['DATETIME'] <= end_time)]

        fig.add_trace(
            go.Scatter(
                x=interval_data['DATETIME'],
                y=interval_data['Glucose value'],
                mode='lines+markers',  # Include both lines and markers
                line=dict(color=cluster_colors[cluster]),
                marker=dict(color=cluster_colors[cluster]),  # Marker color
                name=f'Decrease Cluster {cluster}'
            ),
            row=2, col=1  # Add to the second subplot (decrease events)
        )

fig.update_layout(
    title='Grouped Glucose Events Visualization - K-Means Clustering (Separate Clusters for Increase and Decrease)',
    xaxis_title='Datetime',
    yaxis_title='Glucose Value (mg/dl)',
    showlegend=True
)

# Adjust subplot titles' positions
fig.update_annotations(
    selector=dict(text='Similar increase events clustering'),
    yshift=5 # Increase the vertical position for the first subplot title
)
fig.update_annotations(
    selector=dict(text='Similar decrease events clustering'),
    yshift=5  # Increase the vertical position for the second subplot title
)

fig.update_xaxes(title_text="Datetime", row=2, col=1)  # Add x-axis title for the second subplot
fig.show()
C:\Users\Dharanija Bantu\AppData\Local\Temp\ipykernel_8412\4050558211.py:23: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

C:\Users\Dharanija Bantu\AppData\Local\Temp\ipykernel_8412\4050558211.py:24: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

C:\Anaconda\lib\site-packages\pandas\core\indexing.py:1773: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

C:\Anaconda\lib\site-packages\pandas\core\indexing.py:1773: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

In [41]:
import plotly.subplots as sp
from sklearn.cluster import KMeans
import numpy as np
import pandas as pd

# Create subplots with two rows (one for increase events, one for decrease events)
fig_clusters = sp.make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.15, subplot_titles=("Similar increase events clustering", "Similar decrease events clustering"))

# Remove rows with missing values
df_cleaned = df.dropna(subset=['GlucoseChange', 'TimeDiff'])

# Define time ranges for segmentation
morning_range = pd.to_datetime(['06:00', '11:00'], format='%H:%M').time
noon_range = pd.to_datetime(['11:00', '16:00'], format='%H:%M').time
evening_range = pd.to_datetime(['16:00', '00:00'], format='%H:%M').time
night_range = pd.to_datetime(['00:00', '06:00'], format='%H:%M').time

# Categorize increase events into segments based on time
def categorize_time_segment(time):
    if morning_range[0] <= time <= morning_range[1]:
        return 'Morning'
    elif noon_range[0] <= time <= noon_range[1]:
        return 'Noon'
    elif evening_range[0] <= time <= evening_range[1]:
        return 'Evening'
    else:
        return 'Night'

df_cleaned['TimeSegment'] = df_cleaned['DATETIME'].dt.time.apply(categorize_time_segment)

# Extract the relevant data for clustering (Glucose Change and TimeDiff)
increase_data = df_cleaned[df_cleaned['IncreaseEvent']][['GlucoseChange', 'TimeDiff']].values
decrease_data = df_cleaned[df_cleaned['DecreaseEvent']][['GlucoseChange', 'TimeDiff']].values

# Initialize cluster labels for rows
df_cleaned['IncreaseCluster'] = np.nan
df_cleaned['DecreaseCluster'] = np.nan

# Apply K-Means clustering separately for increase and decrease events within each time segment
n_clusters = 3  

for time_segment in df_cleaned['TimeSegment'].unique():
    increase_mask = (df_cleaned['IncreaseEvent']) & (df_cleaned['TimeSegment'] == time_segment)
    decrease_mask = (df_cleaned['DecreaseEvent']) & (df_cleaned['TimeSegment'] == time_segment)
    
    increase_data_segment = df_cleaned[increase_mask][['GlucoseChange', 'TimeDiff']].values
    decrease_data_segment = df_cleaned[decrease_mask][['GlucoseChange', 'TimeDiff']].values
    
    kmeans_increase = KMeans(n_clusters=n_clusters)
    increase_labels = kmeans_increase.fit_predict(increase_data_segment)
    df_cleaned.loc[increase_mask, 'IncreaseCluster'] = increase_labels

    kmeans_decrease = KMeans(n_clusters=n_clusters)
    decrease_labels = kmeans_decrease.fit_predict(decrease_data_segment)
    df_cleaned.loc[decrease_mask, 'DecreaseCluster'] = decrease_labels

# Define colors for clusters
cluster_colors = ['red', 'blue', 'green']

# Add increase events as curves with markers, colored by cluster
for cluster in range(n_clusters):
    cluster_mask = (df_cleaned['IncreaseEvent']) & (~df_cleaned['IncreaseCluster'].isna()) & (df_cleaned['IncreaseCluster'] == cluster)
    cluster_data = df_cleaned[cluster_mask]
    
    for idx, row in cluster_data.iterrows():
        end_time = row['DATETIME']
        start_time = end_time - pd.Timedelta(minutes=10)  # Calculate the start time as 10 minutes before the event

        # Filter the data to include points within the 10-minute interval
        interval_data = df_cleaned[(df_cleaned['DATETIME'] >= start_time) & (df_cleaned['DATETIME'] <= end_time)]

        fig_clusters.add_trace(
            go.Scatter(
                x=interval_data['DATETIME'],
                y=interval_data['Glucose value'],
                mode='lines+markers',  # Include both lines and markers
                line=dict(color=cluster_colors[cluster]),
                marker=dict(color=cluster_colors[cluster]),  # Marker color
                name=f'Increase Cluster {cluster} ({row["TimeSegment"]})'
            ),
            row=1, col=1  # Add to the first subplot (increase events)
        )

# Add decrease events as curves with markers, colored by cluster
for cluster in range(n_clusters):
    cluster_mask = (df_cleaned['DecreaseEvent']) & (~df_cleaned['DecreaseCluster'].isna()) & (df_cleaned['DecreaseCluster'] == cluster)
    cluster_data = df_cleaned[cluster_mask]
    
    for idx, row in cluster_data.iterrows():
        end_time = row['DATETIME']
        start_time = end_time - pd.Timedelta(minutes=10)  # Calculate the start time as 10 minutes before the event

        # Filter the data to include points within the 10-minute interval
        interval_data = df_cleaned[(df_cleaned['DATETIME'] >= start_time) & (df_cleaned['DATETIME'] <= end_time)]

        fig_clusters.add_trace(
            go.Scatter(
                x=interval_data['DATETIME'],
                y=interval_data['Glucose value'],
                mode='lines+markers',  # Include both lines and markers
                line=dict(color=cluster_colors[cluster]),
                marker=dict(color=cluster_colors[cluster]),  # Marker color
                name=f'Decrease Cluster {cluster} ({row["TimeSegment"]})'
            ),
            row=2, col=1  # Add to the second subplot (decrease events)
        )

fig_clusters.update_layout(
    title='Grouped Glucose Events Visualization - K-Means Clustering with Time Segments',
    xaxis_title='Datetime',
    yaxis_title='Glucose Value (mg/dl)',
    showlegend=True
)

# Adjust subplot titles' positions
fig_clusters.update_annotations(
    selector=dict(text='Similar increase events clustering'),
    yshift=5 # Increase the vertical position for the first subplot title
)
fig_clusters.update_annotations(
    selector=dict(text='Similar decrease events clustering'),
    yshift=5  # Increase the vertical position for the second subplot title
)

# Create tables for cluster centroids
def create_cluster_table(data, title):
    table = go.Figure(data=[go.Table(
        header=dict(values=["Cluster", "Glucose Change Centroid", "TimeDiff Centroid"]),
        cells=dict(values=[data.index, data['GlucoseChange'], data['TimeDiff']])
    )])
    table.update_layout(title=title)
    return table

# Create tables for increase and decrease clusters
increase_cluster_table = create_cluster_table(
    df_cleaned[df_cleaned['IncreaseEvent']].groupby('IncreaseCluster').agg({'GlucoseChange': 'mean', 'TimeDiff': 'mean'}),
    "Increase Clusters Centroids"
)
decrease_cluster_table = create_cluster_table(
    df_cleaned[df_cleaned['DecreaseEvent']].groupby('DecreaseCluster').agg({'GlucoseChange': 'mean', 'TimeDiff': 'mean'}),
    "Decrease Clusters Centroids"
)

# Show the clustering plots
fig_clusters.show()

# Show the cluster tables separately
increase_cluster_table.show()
decrease_cluster_table.show()
C:\Users\Dharanija Bantu\AppData\Local\Temp\ipykernel_8412\872297380.py:29: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

C:\Users\Dharanija Bantu\AppData\Local\Temp\ipykernel_8412\872297380.py:36: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

C:\Users\Dharanija Bantu\AppData\Local\Temp\ipykernel_8412\872297380.py:37: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

C:\Anaconda\lib\site-packages\pandas\core\indexing.py:1773: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

C:\Anaconda\lib\site-packages\pandas\core\indexing.py:1773: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

C:\Anaconda\lib\site-packages\pandas\core\indexing.py:1773: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

C:\Anaconda\lib\site-packages\pandas\core\indexing.py:1773: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

C:\Anaconda\lib\site-packages\pandas\core\indexing.py:1773: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

C:\Anaconda\lib\site-packages\pandas\core\indexing.py:1773: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

In [42]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


# Step 1: Count occurrences of each cluster within time segments
increase_cluster_counts = df_cleaned[df_cleaned['IncreaseEvent']].groupby(['TimeSegment', 'IncreaseCluster']).size().reset_index(name='Count')
decrease_cluster_counts = df_cleaned[df_cleaned['DecreaseEvent']].groupby(['TimeSegment', 'DecreaseCluster']).size().reset_index(name='Count')

# Step 2: Calculate percentage of cluster occurrence within each time segment
increase_total_counts = df_cleaned[df_cleaned['IncreaseEvent']].groupby('TimeSegment').size().reset_index(name='TotalCount')
decrease_total_counts = df_cleaned[df_cleaned['DecreaseEvent']].groupby('TimeSegment').size().reset_index(name='TotalCount')

increase_cluster_counts = increase_cluster_counts.merge(increase_total_counts, on='TimeSegment')
decrease_cluster_counts = decrease_cluster_counts.merge(decrease_total_counts, on='TimeSegment')

increase_cluster_counts['Percentage'] = (increase_cluster_counts['Count'] / increase_cluster_counts['TotalCount']) * 100
decrease_cluster_counts['Percentage'] = (decrease_cluster_counts['Count'] / decrease_cluster_counts['TotalCount']) * 100

# Step 3: Create a heatmap for temporal trends
# Example: Heatmap for increase events
increase_pivot = increase_cluster_counts.pivot('IncreaseCluster', 'TimeSegment', 'Percentage')
plt.figure(figsize=(12, 6))
sns.heatmap(increase_pivot, annot=True, fmt=".2f", cmap="YlGnBu", linewidths=0.5)
plt.title('Temporal Trends of Increase Clusters')
plt.xlabel('Time Segment')
plt.ylabel('Cluster')
plt.show()

# Display tables for cluster centroids
increase_cluster_table.show()


# Step 3 : Create a heatmap for decrease events
decrease_pivot = decrease_cluster_counts.pivot('DecreaseCluster', 'TimeSegment', 'Percentage')
plt.figure(figsize=(12, 6))
sns.heatmap(decrease_pivot, annot=True, fmt=".2f", cmap="YlGnBu", linewidths=0.5)
plt.title('Temporal Trends of Decrease Clusters')
plt.xlabel('Time Segment')
plt.ylabel('Cluster')
plt.show()

decrease_cluster_table.show()
In [43]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Step 1: Count occurrences of each cluster within time segments
increase_cluster_counts = df_cleaned[df_cleaned['IncreaseEvent']].groupby(['TimeSegment', 'IncreaseCluster']).size().reset_index(name='Count')
decrease_cluster_counts = df_cleaned[df_cleaned['DecreaseEvent']].groupby(['TimeSegment', 'DecreaseCluster']).size().reset_index(name='Count')

# Step 2: Calculate percentage of cluster occurrence within each time segment
increase_total_counts = df_cleaned[df_cleaned['IncreaseEvent']].groupby('TimeSegment').size().reset_index(name='TotalCount')
decrease_total_counts = df_cleaned[df_cleaned['DecreaseEvent']].groupby('TimeSegment').size().reset_index(name='TotalCount')

increase_cluster_counts = increase_cluster_counts.merge(increase_total_counts, on='TimeSegment')
decrease_cluster_counts = decrease_cluster_counts.merge(decrease_total_counts, on='TimeSegment')

increase_cluster_counts['Percentage'] = (increase_cluster_counts['Count'] / increase_cluster_counts['TotalCount']) * 100
decrease_cluster_counts['Percentage'] = (decrease_cluster_counts['Count'] / decrease_cluster_counts['TotalCount']) * 100

# Calculate range for increase clusters across all segments
increase_cluster_range = df_cleaned[df_cleaned['IncreaseEvent']].groupby('IncreaseCluster')[['GlucoseChange']].agg(lambda x: (x.max() - x.min(), x.min(), x.max())).reset_index()
increase_cluster_range.rename(columns={'GlucoseChange': 'GlucoseChange_Range'}, inplace=True)

# Display tables for cluster centroids
print("Increase Clusters Centroids")
print(increase_cluster_counts)

# Display the range for increase clusters as a table
print("Increase Clusters Range")
print(increase_cluster_range)

# Step 3: Create a heatmap for temporal trends
# Example: Heatmap for increase events
increase_pivot = increase_cluster_counts.pivot('IncreaseCluster', 'TimeSegment', 'Percentage')
plt.figure(figsize=(12, 6))
sns.heatmap(increase_pivot, annot=True, fmt=".2f", cmap="YlGnBu", linewidths=0.5)
plt.title('Temporal Trends of Increase Clusters')
plt.xlabel('Time Segment')
plt.ylabel('Cluster')
plt.show()

# Calculate range for decrease clusters across all segments
decrease_cluster_range = df_cleaned[df_cleaned['DecreaseEvent']].groupby('DecreaseCluster')[['GlucoseChange']].agg(lambda x: (x.max() - x.min(), x.min(), x.max())).reset_index()
decrease_cluster_range.rename(columns={'GlucoseChange': 'GlucoseChange_Range'}, inplace=True)

# Display tables for cluster centroids
print("Decrease Clusters Centroids")
print(decrease_cluster_counts)

# Display the range for decrease clusters as a table
print("Decrease Clusters Range")
print(decrease_cluster_range)

# Step 3 : Create a heatmap for decrease events
decrease_pivot = decrease_cluster_counts.pivot('DecreaseCluster', 'TimeSegment', 'Percentage')
plt.figure(figsize=(12, 6))
sns.heatmap(decrease_pivot, annot=True, fmt=".2f", cmap="YlGnBu", linewidths=0.5)
plt.title('Temporal Trends of Decrease Clusters')
plt.xlabel('Time Segment')
plt.ylabel('Cluster')
plt.show()
Increase Clusters Centroids
  TimeSegment  IncreaseCluster  Count  TotalCount  Percentage
0     Morning              0.0      3          38    7.894737
1     Morning              1.0     27          38   71.052632
2     Morning              2.0      8          38   21.052632
3       Night              0.0    163         241   67.634855
4       Night              1.0     16         241    6.639004
5       Night              2.0     62         241   25.726141
6        Noon              0.0     30         104   28.846154
7        Noon              1.0     73         104   70.192308
8        Noon              2.0      1         104    0.961538
Increase Clusters Range
   IncreaseCluster GlucoseChange_Range
0              0.0  (21.0, 20.0, 41.0)
1              1.0  (22.0, 20.0, 42.0)
2              2.0  (39.0, 24.0, 63.0)
Decrease Clusters Centroids
  TimeSegment  DecreaseCluster  Count  TotalCount  Percentage
0     Morning              0.0     19          27   70.370370
1     Morning              1.0      7          27   25.925926
2     Morning              2.0      1          27    3.703704
3       Night              0.0    129         218   59.174312
4       Night              1.0     26         218   11.926606
5       Night              2.0     63         218   28.899083
6        Noon              0.0     22          79   27.848101
7        Noon              1.0     48          79   60.759494
8        Noon              2.0      9          79   11.392405
Decrease Clusters Range
   DecreaseCluster   GlucoseChange_Range
0              0.0   (8.0, -28.0, -20.0)
1              1.0  (36.0, -56.0, -20.0)
2              2.0  (20.0, -46.0, -26.0)
C:\Anaconda\lib\site-packages\pandas\core\ops\array_ops.py:73: FutureWarning:

Comparison of Timestamp with datetime.date is deprecated in order to match the standard library behavior.  In a future version these will be considered non-comparable.Use 'ts == pd.Timestamp(date)' or 'ts.date() == date' instead.

In [ ]: